Unverified Commit 10d9872d authored by zhanqiuhu's avatar zhanqiuhu Committed by GitHub
Browse files

[CI][Metrics] Fix local_cache_hit assertion after prompt tokens metrics updates (#39709)


Signed-off-by: default avatarZhanqiuHu <zhu@redhat.com>
parent ccd0d1d9
......@@ -341,8 +341,8 @@ def test_full_decode_gpu_cache_hit_metrics():
print(f"FULL CACHE HIT: {P} tokens, cached={cached}, nixl={expected_nixl}")
print(f" metrics delta: {d}, nixl_bytes_delta={n1 - n0}")
assert len(proxy_text) > 0, "proxy returned empty response"
assert d["local_cache_hit"] == cached, (
f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
assert d["local_cache_hit"] == cached - 1, (
f"expected local_cache_hit={cached - 1}, got {d['local_cache_hit']}"
)
assert d["external_kv_transfer"] == expected_nixl, (
f"expected external_kv_transfer={expected_nixl}, "
......@@ -383,8 +383,8 @@ def test_partial_decode_gpu_cache_hit_metrics():
f"expected external_kv_transfer={expected_nixl}, "
f"got {d['external_kv_transfer']}"
)
assert d["local_cache_hit"] == cached, (
f"expected local_cache_hit={cached}, got {d['local_cache_hit']}"
assert d["local_cache_hit"] == cached - 1, (
f"expected local_cache_hit={cached - 1}, got {d['local_cache_hit']}"
)
assert d["local_compute"] == 1, (
f"expected local_compute=1 (recomputed last token), got {d['local_compute']}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment