"vscode:/vscode.git/clone" did not exist on "c8ab988b15af5e30e87c6eb27a0ededf0377ac9e"
Unverified Commit 30679319 authored by zhanqiuhu's avatar zhanqiuhu Committed by GitHub
Browse files

[CI][KVConnector][Metrics] Update multi KV connector edge case according to...


[CI][KVConnector][Metrics] Update multi KV connector edge case according to prefill stats changes (#39808)
Signed-off-by: default avatarZhanqiu Hu <zhu@redhat.com>
parent 240f2636
......@@ -297,7 +297,7 @@ def test_multi_block_correctness():
def test_cold_decode_no_cache_hit_metrics():
"""Cold decode: external_kv_transfer==P, local_cache_hit==0."""
"""Cold decode: external_kv_transfer==P, local_cache_hit==0, local_compute==0."""
n0 = _fetch_nixl_bytes(DECODE_HOST, DECODE_PORT)
m0 = _fetch_decode_metrics()
proxy_text, P = _complete(proxy_client, MEDIUM_PROMPT)
......@@ -312,8 +312,8 @@ def test_cold_decode_no_cache_hit_metrics():
assert d["external_kv_transfer"] == P, (
f"expected external_kv_transfer={P}, got {d['external_kv_transfer']}"
)
assert d["local_compute"] == 1, (
f"expected local_compute=1, got {d['local_compute']}"
assert d["local_compute"] == 0, (
f"expected local_compute=0, got {d['local_compute']}"
)
assert d["local_cache_hit"] == 0, (
f"expected local_cache_hit=0, got {d['local_cache_hit']}"
......@@ -341,15 +341,15 @@ def test_full_decode_gpu_cache_hit_metrics():
print(f"FULL CACHE HIT: {P} tokens, cached={cached}, nixl={expected_nixl}")
print(f" metrics delta: {d}, nixl_bytes_delta={n1 - n0}")
assert len(proxy_text) > 0, "proxy returned empty response"
assert d["local_cache_hit"] == cached - 1, (
f"expected local_cache_hit={cached - 1}, got {d['local_cache_hit']}"
assert d["local_cache_hit"] == cached, (
f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
)
assert d["external_kv_transfer"] == expected_nixl, (
f"expected external_kv_transfer={expected_nixl}, "
f"got {d['external_kv_transfer']}"
)
assert d["local_compute"] == 1, (
f"expected local_compute=1 (recomputed last token), got {d['local_compute']}"
assert d["local_compute"] == 0, (
f"expected local_compute=0, got {d['local_compute']}"
)
assert n1 - n0 > 0, (
f"expected nixl_bytes_transferred to increase (partial NIXL for "
......@@ -383,11 +383,11 @@ def test_partial_decode_gpu_cache_hit_metrics():
f"expected external_kv_transfer={expected_nixl}, "
f"got {d['external_kv_transfer']}"
)
assert d["local_cache_hit"] == cached - 1, (
f"expected local_cache_hit={cached - 1}, got {d['local_cache_hit']}"
assert d["local_cache_hit"] == cached, (
f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
)
assert d["local_compute"] == 1, (
f"expected local_compute=1 (recomputed last token), got {d['local_compute']}"
assert d["local_compute"] == 0, (
f"expected local_compute=0, got {d['local_compute']}"
)
assert n1 - n0 > 0, (
f"expected nixl_bytes_transferred to increase (NIXL for uncached "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment