Unverified Commit 8b651fe9 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

test: make metrics_port mandatory in fetch_kvbm_metrics (#6009)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 91fb78cd
...@@ -770,10 +770,13 @@ class TestDeterminism: ...@@ -770,10 +770,13 @@ class TestDeterminism:
) )
return bench_process, bench_file, bench_log return bench_process, bench_file, bench_log
def _wait_for_benchmark_activity(self, initial_offload: int) -> bool: def _wait_for_benchmark_activity(
self, metrics_port: int, initial_offload: int
) -> bool:
"""Wait for benchmark to start creating offload activity. """Wait for benchmark to start creating offload activity.
Args: Args:
metrics_port: Port number for the KVBM metrics endpoint
initial_offload: Initial offload block count to compare against initial_offload: Initial offload block count to compare against
Returns: Returns:
...@@ -787,7 +790,7 @@ class TestDeterminism: ...@@ -787,7 +790,7 @@ class TestDeterminism:
elapsed = (wait_iteration + 1) * 5 elapsed = (wait_iteration + 1) * 5
try: try:
current_metrics = fetch_kvbm_metrics() current_metrics = fetch_kvbm_metrics(port=metrics_port)
current_offload = current_metrics.get("kvbm_offload_blocks_d2h", 0) current_offload = current_metrics.get("kvbm_offload_blocks_d2h", 0)
if current_offload > initial_offload: if current_offload > initial_offload:
...@@ -935,10 +938,11 @@ class TestDeterminism: ...@@ -935,10 +938,11 @@ class TestDeterminism:
f"Exact matches: {exact_matches}/{num_requests} ({exact_matches/num_requests:.1%})" f"Exact matches: {exact_matches}/{num_requests} ({exact_matches/num_requests:.1%})"
) )
def _show_final_kvbm_stats(self, initial_offload: int): def _show_final_kvbm_stats(self, metrics_port: int, initial_offload: int):
"""Display final KVBM metrics and compare with initial state. """Display final KVBM metrics and compare with initial state.
Args: Args:
metrics_port: Port number for the KVBM metrics endpoint
initial_offload: Initial offload block count to compare against initial_offload: Initial offload block count to compare against
Raises: Raises:
...@@ -948,7 +952,7 @@ class TestDeterminism: ...@@ -948,7 +952,7 @@ class TestDeterminism:
print("FINAL KVBM STATS") print("FINAL KVBM STATS")
print(f"{'='*70}") print(f"{'='*70}")
try: try:
final_metrics = fetch_kvbm_metrics() final_metrics = fetch_kvbm_metrics(port=metrics_port)
final_offload = final_metrics.get("kvbm_offload_blocks_d2h", 0) final_offload = final_metrics.get("kvbm_offload_blocks_d2h", 0)
final_onboard = final_metrics.get("kvbm_onboard_blocks_h2d", 0) final_onboard = final_metrics.get("kvbm_onboard_blocks_h2d", 0)
...@@ -1030,7 +1034,7 @@ class TestDeterminism: ...@@ -1030,7 +1034,7 @@ class TestDeterminism:
# Check initial metrics # Check initial metrics
print("\nChecking initial KVBM metrics...") print("\nChecking initial KVBM metrics...")
try: try:
initial_metrics = fetch_kvbm_metrics() initial_metrics = fetch_kvbm_metrics(port=llm_server.metrics_port)
initial_offload = initial_metrics.get("kvbm_offload_blocks_d2h", 0) initial_offload = initial_metrics.get("kvbm_offload_blocks_d2h", 0)
print(f"Initial offload: {initial_offload} blocks") print(f"Initial offload: {initial_offload} blocks")
except Exception as e: except Exception as e:
...@@ -1038,7 +1042,9 @@ class TestDeterminism: ...@@ -1038,7 +1042,9 @@ class TestDeterminism:
initial_offload = 0 initial_offload = 0
# Wait for benchmark activity # Wait for benchmark activity
benchmark_started = self._wait_for_benchmark_activity(initial_offload) benchmark_started = self._wait_for_benchmark_activity(
llm_server.metrics_port, initial_offload
)
if not benchmark_started: if not benchmark_started:
pytest.fail( pytest.fail(
"Benchmark failed to start or create offload activity. " "Benchmark failed to start or create offload activity. "
...@@ -1116,7 +1122,7 @@ class TestDeterminism: ...@@ -1116,7 +1122,7 @@ class TestDeterminism:
) )
# Show final KVBM stats # Show final KVBM stats
self._show_final_kvbm_stats(initial_offload) self._show_final_kvbm_stats(llm_server.metrics_port, initial_offload)
finally: finally:
print("\nStopping benchmark...") print("\nStopping benchmark...")
...@@ -1292,7 +1298,7 @@ def parse_kvbm_metrics(metrics_text: str) -> dict: ...@@ -1292,7 +1298,7 @@ def parse_kvbm_metrics(metrics_text: str) -> dict:
return metrics return metrics
def fetch_kvbm_metrics(port: Optional[int] = None, timeout: int = 10) -> dict: def fetch_kvbm_metrics(port: int, timeout: int = 10) -> dict:
"""Fetch and parse KVBM metrics from the metrics endpoint. """Fetch and parse KVBM metrics from the metrics endpoint.
Args: Args:
...@@ -1303,14 +1309,8 @@ def fetch_kvbm_metrics(port: Optional[int] = None, timeout: int = 10) -> dict: ...@@ -1303,14 +1309,8 @@ def fetch_kvbm_metrics(port: Optional[int] = None, timeout: int = 10) -> dict:
Dictionary of parsed metrics Dictionary of parsed metrics
Raises: Raises:
ValueError: If port is not provided
RuntimeError: If metrics endpoint is unreachable or returns error RuntimeError: If metrics endpoint is unreachable or returns error
""" """
if port is None:
raise ValueError(
"port must be provided explicitly. "
"Hardcoded default port is not supported for pytest-xdist compatibility."
)
response = requests.get(f"http://localhost:{port}/metrics", timeout=timeout) response = requests.get(f"http://localhost:{port}/metrics", timeout=timeout)
if response.status_code != 200: if response.status_code != 200:
raise RuntimeError( raise RuntimeError(
......
...@@ -60,10 +60,20 @@ def print_phase(phase_num: int, description: str) -> None: ...@@ -60,10 +60,20 @@ def print_phase(phase_num: int, description: str) -> None:
print(f"\n=== Phase {phase_num}: {description} ===") print(f"\n=== Phase {phase_num}: {description} ===")
def check_kvbm_metrics(phase_name: str) -> dict[str, int]: def check_kvbm_metrics(phase_name: str, metrics_port: int) -> dict[str, int]:
"""Fetch and display KVBM metrics.""" """Fetch and display KVBM metrics.
Args:
phase_name: Name of the test phase for logging
metrics_port: Port number for the KVBM metrics endpoint
Returns:
Dictionary containing KVBM metrics with keys:
- kvbm_offload_blocks_d2h: Blocks offloaded from GPU to CPU
- kvbm_onboard_blocks_h2d: Blocks onboarded from CPU to GPU
"""
print(f"\n--- Checking KVBM metrics after {phase_name} ---") print(f"\n--- Checking KVBM metrics after {phase_name} ---")
metrics = fetch_kvbm_metrics() metrics = fetch_kvbm_metrics(port=metrics_port)
offload_d2h = metrics.get("kvbm_offload_blocks_d2h", 0) offload_d2h = metrics.get("kvbm_offload_blocks_d2h", 0)
onboard_h2d = metrics.get("kvbm_onboard_blocks_h2d", 0) onboard_h2d = metrics.get("kvbm_onboard_blocks_h2d", 0)
...@@ -140,7 +150,7 @@ def test_chunked_prefill_offload(tester, llm_server_kvbm): # noqa: F811 ...@@ -140,7 +150,7 @@ def test_chunked_prefill_offload(tester, llm_server_kvbm): # noqa: F811
response_1 = tester.make_request(LONG_PROMPT, max_tokens=MAX_TOKENS) response_1 = tester.make_request(LONG_PROMPT, max_tokens=MAX_TOKENS)
print(f"Response 1: {response_1}") print(f"Response 1: {response_1}")
metrics_p1 = check_kvbm_metrics("Phase 1") metrics_p1 = check_kvbm_metrics("Phase 1", llm_server_kvbm.metrics_port)
# Verify offload occurred # Verify offload occurred
offloaded_blocks = metrics_p1["kvbm_offload_blocks_d2h"] offloaded_blocks = metrics_p1["kvbm_offload_blocks_d2h"]
...@@ -178,7 +188,7 @@ def test_chunked_prefill_offload(tester, llm_server_kvbm): # noqa: F811 ...@@ -178,7 +188,7 @@ def test_chunked_prefill_offload(tester, llm_server_kvbm): # noqa: F811
response_2 = tester.make_request(LONG_PROMPT, max_tokens=MAX_TOKENS) response_2 = tester.make_request(LONG_PROMPT, max_tokens=MAX_TOKENS)
print(f"Response 2: {response_2}") print(f"Response 2: {response_2}")
metrics_p3 = check_kvbm_metrics("Phase 3") metrics_p3 = check_kvbm_metrics("Phase 3", llm_server_kvbm.metrics_port)
# Verify onboarding occurred # Verify onboarding occurred
onboarded_blocks = metrics_p3["kvbm_onboard_blocks_h2d"] onboarded_blocks = metrics_p3["kvbm_onboard_blocks_h2d"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment