Unverified Commit 8b651fe9 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

test: make metrics_port mandatory in fetch_kvbm_metrics (#6009)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 91fb78cd
......@@ -770,10 +770,13 @@ class TestDeterminism:
)
return bench_process, bench_file, bench_log
def _wait_for_benchmark_activity(self, initial_offload: int) -> bool:
def _wait_for_benchmark_activity(
self, metrics_port: int, initial_offload: int
) -> bool:
"""Wait for benchmark to start creating offload activity.
Args:
metrics_port: Port number for the KVBM metrics endpoint
initial_offload: Initial offload block count to compare against
Returns:
......@@ -787,7 +790,7 @@ class TestDeterminism:
elapsed = (wait_iteration + 1) * 5
try:
current_metrics = fetch_kvbm_metrics()
current_metrics = fetch_kvbm_metrics(port=metrics_port)
current_offload = current_metrics.get("kvbm_offload_blocks_d2h", 0)
if current_offload > initial_offload:
......@@ -935,10 +938,11 @@ class TestDeterminism:
f"Exact matches: {exact_matches}/{num_requests} ({exact_matches/num_requests:.1%})"
)
def _show_final_kvbm_stats(self, initial_offload: int):
def _show_final_kvbm_stats(self, metrics_port: int, initial_offload: int):
"""Display final KVBM metrics and compare with initial state.
Args:
metrics_port: Port number for the KVBM metrics endpoint
initial_offload: Initial offload block count to compare against
Raises:
......@@ -948,7 +952,7 @@ class TestDeterminism:
print("FINAL KVBM STATS")
print(f"{'='*70}")
try:
final_metrics = fetch_kvbm_metrics()
final_metrics = fetch_kvbm_metrics(port=metrics_port)
final_offload = final_metrics.get("kvbm_offload_blocks_d2h", 0)
final_onboard = final_metrics.get("kvbm_onboard_blocks_h2d", 0)
......@@ -1030,7 +1034,7 @@ class TestDeterminism:
# Check initial metrics
print("\nChecking initial KVBM metrics...")
try:
initial_metrics = fetch_kvbm_metrics()
initial_metrics = fetch_kvbm_metrics(port=llm_server.metrics_port)
initial_offload = initial_metrics.get("kvbm_offload_blocks_d2h", 0)
print(f"Initial offload: {initial_offload} blocks")
except Exception as e:
......@@ -1038,7 +1042,9 @@ class TestDeterminism:
initial_offload = 0
# Wait for benchmark activity
benchmark_started = self._wait_for_benchmark_activity(initial_offload)
benchmark_started = self._wait_for_benchmark_activity(
llm_server.metrics_port, initial_offload
)
if not benchmark_started:
pytest.fail(
"Benchmark failed to start or create offload activity. "
......@@ -1116,7 +1122,7 @@ class TestDeterminism:
)
# Show final KVBM stats
self._show_final_kvbm_stats(initial_offload)
self._show_final_kvbm_stats(llm_server.metrics_port, initial_offload)
finally:
print("\nStopping benchmark...")
......@@ -1292,7 +1298,7 @@ def parse_kvbm_metrics(metrics_text: str) -> dict:
return metrics
def fetch_kvbm_metrics(port: Optional[int] = None, timeout: int = 10) -> dict:
def fetch_kvbm_metrics(port: int, timeout: int = 10) -> dict:
"""Fetch and parse KVBM metrics from the metrics endpoint.
Args:
......@@ -1303,14 +1309,8 @@ def fetch_kvbm_metrics(port: Optional[int] = None, timeout: int = 10) -> dict:
Dictionary of parsed metrics
Raises:
ValueError: If port is not provided
RuntimeError: If metrics endpoint is unreachable or returns error
"""
if port is None:
raise ValueError(
"port must be provided explicitly. "
"Hardcoded default port is not supported for pytest-xdist compatibility."
)
response = requests.get(f"http://localhost:{port}/metrics", timeout=timeout)
if response.status_code != 200:
raise RuntimeError(
......
......@@ -60,10 +60,20 @@ def print_phase(phase_num: int, description: str) -> None:
print(f"\n=== Phase {phase_num}: {description} ===")
def check_kvbm_metrics(phase_name: str) -> dict[str, int]:
"""Fetch and display KVBM metrics."""
def check_kvbm_metrics(phase_name: str, metrics_port: int) -> dict[str, int]:
"""Fetch and display KVBM metrics.
Args:
phase_name: Name of the test phase for logging
metrics_port: Port number for the KVBM metrics endpoint
Returns:
Dictionary containing KVBM metrics with keys:
- kvbm_offload_blocks_d2h: Blocks offloaded from GPU to CPU
- kvbm_onboard_blocks_h2d: Blocks onboarded from CPU to GPU
"""
print(f"\n--- Checking KVBM metrics after {phase_name} ---")
metrics = fetch_kvbm_metrics()
metrics = fetch_kvbm_metrics(port=metrics_port)
offload_d2h = metrics.get("kvbm_offload_blocks_d2h", 0)
onboard_h2d = metrics.get("kvbm_onboard_blocks_h2d", 0)
......@@ -140,7 +150,7 @@ def test_chunked_prefill_offload(tester, llm_server_kvbm): # noqa: F811
response_1 = tester.make_request(LONG_PROMPT, max_tokens=MAX_TOKENS)
print(f"Response 1: {response_1}")
metrics_p1 = check_kvbm_metrics("Phase 1")
metrics_p1 = check_kvbm_metrics("Phase 1", llm_server_kvbm.metrics_port)
# Verify offload occurred
offloaded_blocks = metrics_p1["kvbm_offload_blocks_d2h"]
......@@ -178,7 +188,7 @@ def test_chunked_prefill_offload(tester, llm_server_kvbm): # noqa: F811
response_2 = tester.make_request(LONG_PROMPT, max_tokens=MAX_TOKENS)
print(f"Response 2: {response_2}")
metrics_p3 = check_kvbm_metrics("Phase 3")
metrics_p3 = check_kvbm_metrics("Phase 3", llm_server_kvbm.metrics_port)
# Verify onboarding occurred
onboarded_blocks = metrics_p3["kvbm_onboard_blocks_h2d"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment