Unverified Commit 30679319 authored by zhanqiuhu's avatar zhanqiuhu Committed by GitHub
Browse files

[CI][KVConnector][Metrics] Update multi KV connector edge case according to...


[CI][KVConnector][Metrics] Update multi KV connector edge case according to prefill stats changes (#39808)
Signed-off-by: default avatarZhanqiu Hu <zhu@redhat.com>
parent 240f2636
...@@ -297,7 +297,7 @@ def test_multi_block_correctness(): ...@@ -297,7 +297,7 @@ def test_multi_block_correctness():
def test_cold_decode_no_cache_hit_metrics(): def test_cold_decode_no_cache_hit_metrics():
"""Cold decode: external_kv_transfer==P, local_cache_hit==0.""" """Cold decode: external_kv_transfer==P, local_cache_hit==0, local_compute==0."""
n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT) n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
m0 = _fetch_decode_metrics() m0 = _fetch_decode_metrics()
proxy_text, P = _complete(proxy_client, MEDIUM_PROMPT) proxy_text, P = _complete(proxy_client, MEDIUM_PROMPT)
...@@ -312,8 +312,8 @@ def test_cold_decode_no_cache_hit_metrics(): ...@@ -312,8 +312,8 @@ def test_cold_decode_no_cache_hit_metrics():
assert d["external_kv_transfer"] == P, ( assert d["external_kv_transfer"] == P, (
f"expected external_kv_transfer={P}, got {d['external_kv_transfer']}" f"expected external_kv_transfer={P}, got {d['external_kv_transfer']}"
) )
assert d["local_compute"] == 1, ( assert d["local_compute"] == 0, (
f"expected local_compute=1, got {d['local_compute']}" f"expected local_compute=0, got {d['local_compute']}"
) )
assert d["local_cache_hit"] == 0, ( assert d["local_cache_hit"] == 0, (
f"expected local_cache_hit=0, got {d['local_cache_hit']}" f"expected local_cache_hit=0, got {d['local_cache_hit']}"
...@@ -341,15 +341,15 @@ def test_full_decode_gpu_cache_hit_metrics(): ...@@ -341,15 +341,15 @@ def test_full_decode_gpu_cache_hit_metrics():
print(f"FULL CACHE HIT: {P} tokens, cached={cached}, nixl={expected_nixl}") print(f"FULL CACHE HIT: {P} tokens, cached={cached}, nixl={expected_nixl}")
print(f" metrics delta: {d}, nixl_bytes_delta={n1 - n0}") print(f" metrics delta: {d}, nixl_bytes_delta={n1 - n0}")
assert len(proxy_text) > 0, "proxy returned empty response" assert len(proxy_text) > 0, "proxy returned empty response"
assert d["local_cache_hit"] == cached - 1, ( assert d["local_cache_hit"] == cached, (
f"expected local_cache_hit={cached - 1}, got {d['local_cache_hit']}" f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
) )
assert d["external_kv_transfer"] == expected_nixl, ( assert d["external_kv_transfer"] == expected_nixl, (
f"expected external_kv_transfer={expected_nixl}, " f"expected external_kv_transfer={expected_nixl}, "
f"got {d['external_kv_transfer']}" f"got {d['external_kv_transfer']}"
) )
assert d["local_compute"] == 1, ( assert d["local_compute"] == 0, (
f"expected local_compute=1 (recomputed last token), got {d['local_compute']}" f"expected local_compute=0, got {d['local_compute']}"
) )
assert n1 - n0 > 0, ( assert n1 - n0 > 0, (
f"expected nixl_bytes_transferred to increase (partial NIXL for " f"expected nixl_bytes_transferred to increase (partial NIXL for "
...@@ -383,11 +383,11 @@ def test_partial_decode_gpu_cache_hit_metrics(): ...@@ -383,11 +383,11 @@ def test_partial_decode_gpu_cache_hit_metrics():
f"expected external_kv_transfer={expected_nixl}, " f"expected external_kv_transfer={expected_nixl}, "
f"got {d['external_kv_transfer']}" f"got {d['external_kv_transfer']}"
) )
assert d["local_cache_hit"] == cached - 1, ( assert d["local_cache_hit"] == cached, (
f"expected local_cache_hit={cached - 1}, got {d['local_cache_hit']}" f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
) )
assert d["local_compute"] == 1, ( assert d["local_compute"] == 0, (
f"expected local_compute=1 (recomputed last token), got {d['local_compute']}" f"expected local_compute=0, got {d['local_compute']}"
) )
assert n1 - n0 > 0, ( assert n1 - n0 > 0, (
f"expected nixl_bytes_transferred to increase (NIXL for uncached " f"expected nixl_bytes_transferred to increase (NIXL for uncached "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment