Unverified Commit 07742cc2 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

feat: move standalone KV indexer into kv-router crate with HTTP integration tests (#6569)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent e7f3361e
......@@ -4,6 +4,7 @@
import logging
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
from typing import Generator, Optional
......@@ -236,6 +237,30 @@ def predownload_tokenizers(pytestconfig):
os.environ.pop("HF_HUB_OFFLINE", None)
@pytest.fixture(scope="session")
def build_kv_indexer():
"""Pre-build the standalone KV indexer binary once per session.
Runs `cargo build` so that `cargo run` in tests starts instantly.
No-op if the binary is already cached in target/.
"""
_logger.info("Building dynamo-kv-indexer binary (cached after first build)")
subprocess.check_call(
[
"cargo",
"build",
"-p",
"dynamo-kv-router",
"--features",
"indexer-bin",
"--bin",
"dynamo-kv-indexer",
],
timeout=600,
)
_logger.info("dynamo-kv-indexer binary ready")
@pytest.fixture(autouse=True)
def logger(request):
log_dir = resolve_test_output_path(request.node.name)
......
......@@ -13,7 +13,6 @@ from typing import TYPE_CHECKING, Any, Optional
import aiohttp
import nats
from dynamo._internal import start_kv_block_indexer
from dynamo.llm import KvRouter, KvRouterConfig
from dynamo.runtime import DistributedRuntime
from tests.utils.managed_process import ManagedProcess
......@@ -1354,6 +1353,7 @@ def _test_router_indexers_sync(
nats_server: Optional["NatsServer"] = None,
durable_kv_events: bool = False,
router_event_threads: int = 4,
standalone_indexer_url: Optional[str] = None,
):
"""Test that two KV routers have synchronized indexer states after processing requests.
......@@ -1401,6 +1401,15 @@ def _test_router_indexers_sync(
router_event_threads=router_event_threads,
)
# If standalone indexer mode, launch mockers one-by-one and register.
# We need to create a temporary endpoint just to discover worker IDs.
if standalone_indexer_url:
tmp_runtime = get_runtime(store_backend, request_plane)
tmp_endpoint = tmp_runtime.endpoint(
f"{engine_workers.namespace}.{engine_workers.component_name}.generate"
)
await engine_workers.launch_mockers_with_indexer(tmp_endpoint)
async def send_requests_to_router(router, num_requests, router_name, endpoint):
# Now send the actual requests
tasks = []
......@@ -1573,9 +1582,8 @@ def _test_router_indexers_sync(
await asyncio.sleep(1)
# Verify NATS object store bucket was created with snapshot
# Skip this verification for NATS interruption test since NATS restarts fresh
# (local indexer recovery doesn't rely on NATS persistence)
if not test_nats_interruption:
# Skip for NATS interruption test (restarts fresh), and standalone indexer (no JetStream)
if not test_nats_interruption and not standalone_indexer_url:
# Mirror the Rust bucket naming logic from subscriber.rs:
# component.subject() -> "namespace.{ns}.component.{comp}"
# then slugify (convert dots to dashes, lowercase, etc) and append "-radix-bucket"
......@@ -1651,38 +1659,30 @@ def _test_router_indexers_sync(
assert_event_dumps_equal(sorted_state1, sorted_state2, "Router 1", "Router 2")
logger.info("Successfully verified that both router states are equal")
# Verify standalone indexer builds the same tree (only for non-durable/NATS Core)
if not durable_kv_events:
logger.info("Starting standalone indexer and verifying tree state")
runtime3 = get_runtime(store_backend, request_plane)
endpoint3 = runtime3.endpoint(
f"{engine_workers.namespace}.{engine_workers.component_name}.generate"
)
await start_kv_block_indexer(endpoint3, block_size, kv_router_config)
# Verify standalone HTTP indexer builds the same tree (non-durable with ZMQ)
if standalone_indexer_url:
logger.info("Verifying standalone HTTP indexer tree state via /dump")
# Wait for the standalone indexer to sync events from workers
# Wait for ZMQ events to propagate to the indexer
await asyncio.sleep(3)
# Query the standalone indexer's tree via kv_indexer_query endpoint
# Note: reuse runtime3 to keep the standalone indexer's component alive
query_endpoint = runtime3.endpoint(
f"{engine_workers.namespace}.{engine_workers.component_name}.kv_indexer_query"
)
query_client = await query_endpoint.client()
await query_client.wait_for_instances()
stream = await query_client.generate("DumpTree", annotated=False)
response = await stream.__anext__()
standalone_state = response["TreeDump"]
async with aiohttp.ClientSession() as session:
async with session.get(f"{standalone_indexer_url}/dump") as resp:
assert resp.status == 200, f"GET /dump failed: {resp.status}"
standalone_state = await resp.json()
sorted_standalone = sorted(standalone_state, key=sort_key)
logger.info(f"Standalone indexer has {len(sorted_standalone)} events")
logger.info(f"Standalone HTTP indexer has {len(sorted_standalone)} events")
assert_event_dumps_equal(
sorted_state1, sorted_standalone, "Router 1", "Standalone"
)
logger.info(
"Successfully verified standalone indexer state matches router states"
"Successfully verified standalone HTTP indexer state matches router states"
)
elif not durable_kv_events:
logger.info(
"Skipping standalone indexer verification (no standalone_indexer_url)"
)
else:
logger.info(
......@@ -1690,8 +1690,8 @@ def _test_router_indexers_sync(
)
# Verify NATS consumers are created (while routers are still alive)
# Skip this for NATS interruption test since it uses local indexer (NATS Core, not JetStream)
if not test_nats_interruption:
# Skip for NATS interruption test and standalone indexer (neither uses JetStream)
if not test_nats_interruption and not standalone_indexer_url:
logger.info("Verifying NATS consumers exist for both routers")
component_subject = f"namespace.{engine_workers.namespace}.component.{engine_workers.component_name}"
slugified = component_subject.lower().replace(".", "-").replace("_", "-")
......@@ -1948,6 +1948,7 @@ def _test_router_decisions(
use_kv_events: bool = True,
durable_kv_events: bool = False,
router_event_threads: int = 4,
standalone_indexer_url: Optional[str] = None,
):
"""Validate cross-worker routing decisions based on longest prefix match and tree-size tiebreaking.
......@@ -1976,7 +1977,15 @@ def _test_router_decisions(
Raises:
AssertionError: If routing decisions don't match expected prefix/tiebreak logic
"""
# Create KvRouterConfig with lower snapshot threshold for testing
# Use async to manage the test flow
async def test_sync():
# If standalone indexer mode, launch mockers one-by-one and register.
# Must happen before KvRouter creation since KvRouter blocks until workers appear.
if standalone_indexer_url:
await engine_workers.launch_mockers_with_indexer(endpoint)
kv_router_config = KvRouterConfig(
router_snapshot_threshold=20,
use_kv_events=use_kv_events,
......@@ -1989,8 +1998,6 @@ def _test_router_decisions(
kv_router_config=kv_router_config,
)
# Use async to manage the test flow
async def test_sync():
# Workers register one instance per process (not per dp_rank)
expected_num_instances = engine_workers.num_workers
......@@ -2098,6 +2105,7 @@ def _test_router_decisions(
dp_rank_a,
dp_rank_b,
response_worker_ids,
A + C + D + F, # req4 tokens for standalone indexer /score verification
)
# Run the async test
......@@ -2108,6 +2116,7 @@ def _test_router_decisions(
dp_rank_a,
dp_rank_b,
response_worker_ids,
req4_tokens,
) = asyncio.run(test_sync())
# Verify request 4 routed to worker a (longest prefix match)
......@@ -2176,6 +2185,41 @@ def _test_router_decisions(
f"worker_b {worker_b_key} has {worker_b_events} events"
)
# Verify standalone indexer scores via HTTP POST /query
if standalone_indexer_url:
_dp_a = dp_rank_a if dp_rank_a is not None else 0
_dp_b = dp_rank_b if dp_rank_b is not None else 0
async def _verify_scores():
# Wait for ZMQ events to propagate to the indexer
await asyncio.sleep(3)
async with aiohttp.ClientSession() as session:
async with session.post(
f"{standalone_indexer_url}/query",
json={"token_ids": req4_tokens},
) as resp:
assert resp.status == 200, f"POST /query failed: {resp.status}"
scores = (await resp.json())["scores"]
id_a = str(worker_a_id)
id_b = str(worker_b_id)
dp_a = str(_dp_a)
dp_b = str(_dp_b)
score_a = scores[id_a][dp_a]
score_b = scores[id_b][dp_b]
logger.info(
f"Standalone indexer /query: {id_a}[{dp_a}]={score_a}, "
f"{id_b}[{dp_b}]={score_b}"
)
assert score_a > score_b, (
f"Expected instance {id_a} dp_rank {dp_a} score {score_a} > "
f"instance {id_b} dp_rank {dp_b} score {score_b} for req4 tokens"
)
asyncio.run(_verify_scores())
def _test_busy_threshold_endpoint(
engine_workers,
......
......@@ -12,10 +12,12 @@
# endpoint_path (e.g., "generate") as the routing key, causing handler collisions when multiple
# workers register the same endpoint. This is a test-only limitation; production deployments
# with separate processes per worker work correctly with TCP.
import asyncio
import logging
import os
from typing import Any, Dict, Optional
import aiohttp
import pytest
from tests.router.common import ( # utilities
......@@ -172,7 +174,12 @@ def _build_mocker_command(
class MockerProcess:
"""Manages mocker engine instances with shared tokio runtime via --num-workers."""
"""Manages mocker engine instances with shared tokio runtime via --num-workers.
When standalone_indexer=True, launches mockers one-by-one (each as --num-workers 1)
and runs a standalone HTTP KV indexer binary alongside them. Call launch_mockers_with_indexer()
in async context to start mockers and register their ZMQ ports with the indexer.
"""
def __init__(
self,
......@@ -182,6 +189,7 @@ class MockerProcess:
store_backend: str = "etcd",
request_plane: str = "nats",
zmq_kv_events: bool = False,
standalone_indexer: bool = False,
):
namespace_suffix = generate_random_suffix()
self.namespace = f"test-namespace-{namespace_suffix}"
......@@ -189,8 +197,17 @@ class MockerProcess:
self.endpoint = f"dyn://{self.namespace}.{self.component_name}.generate"
self.num_workers = num_mockers
self._zmq_kv_events_ports: list[int] = []
mocker_args = (mocker_args or {}).copy()
self._standalone_indexer = standalone_indexer
self._standalone_indexer_port: Optional[int] = None
self._indexer_process: Optional[ManagedProcess] = None
self._mocker_processes: list[ManagedProcess] = []
self._request = request
self._store_backend = store_backend
self._request_plane = request_plane
self._mocker_args_orig: Dict[str, Any] = (mocker_args or {}).copy()
self.worker_id_to_zmq_ports: dict[int, dict[int, str]] = {}
mocker_args = self._mocker_args_orig.copy()
# Store dp_size for DP-aware test functions
self.dp_size = mocker_args.get("dp_size")
# Alias for consistency with vLLM/SGLang workers
......@@ -206,12 +223,24 @@ class MockerProcess:
num_mockers * dp_size, BASE_PORT_ZMQ
)
bases = [self._zmq_kv_events_ports[i * dp_size] for i in range(num_mockers)]
if not standalone_indexer:
mocker_args["zmq_kv_events_ports"] = ",".join(str(p) for p in bases)
logger.info(
f"Allocated ZMQ KV event ports {self._zmq_kv_events_ports} "
f"(bases: {bases}) for {num_mockers} workers"
)
if standalone_indexer:
# Allocate a port for the standalone indexer HTTP server
self._standalone_indexer_port = allocate_ports(1, BASE_PORT)[0]
request.addfinalizer(
lambda: deallocate_ports([self._standalone_indexer_port])
if self._standalone_indexer_port
else None
)
# Don't build a single mocker command — we'll launch per-mocker in launch_mockers_with_indexer
self._process = None
else:
command = _build_mocker_command(
endpoint=self.endpoint,
store_backend=store_backend,
......@@ -234,15 +263,167 @@ class MockerProcess:
)
logger.info(
f"Created mocker process with {num_mockers} worker(s), endpoint: {self.endpoint}"
f"{', standalone_indexer=True' if standalone_indexer else ''}"
)
@property
def standalone_indexer_url(self) -> Optional[str]:
if self._standalone_indexer_port is not None:
return f"http://localhost:{self._standalone_indexer_port}"
return None
def __enter__(self):
if self._standalone_indexer:
# Launch the standalone indexer binary
block_size = self._mocker_args_orig.get("block_size", BLOCK_SIZE)
indexer_cmd = [
"cargo",
"run",
"-p",
"dynamo-kv-router",
"--features",
"indexer-bin",
"--bin",
"dynamo-kv-indexer",
"--",
"--block-size",
str(block_size),
"--port",
str(self._standalone_indexer_port),
]
self._indexer_process = ManagedProcess(
command=indexer_cmd,
timeout=120,
display_output=True,
health_check_ports=[self._standalone_indexer_port],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name="dynamo-kv-indexer",
)
logger.info(
f"Starting standalone indexer on port {self._standalone_indexer_port}"
)
self._indexer_process.__enter__()
# Don't start mocker processes yet — launch_mockers_with_indexer will do it
else:
logger.info(f"Starting mocker process with {self.num_workers} worker(s)")
self._process.__enter__()
return self
async def launch_mockers_with_indexer(self, endpoint):
"""Launch mockers one-by-one and register each with the standalone indexer.
For each mocker:
1. Launch a mocker process with --num-workers 1
2. Poll endpoint.client().instance_ids() until a new worker_id appears
3. POST /workers to the indexer with the worker_id and its ZMQ addresses
Args:
endpoint: The dynamo endpoint object to discover worker IDs.
"""
client = await endpoint.client()
known_ids: set[int] = set()
dp_size = self._mocker_args_orig.get("dp_size", 1)
for i in range(self.num_workers):
# Build per-mocker args with its own ZMQ base port
mocker_args = self._mocker_args_orig.copy()
base_port = self._zmq_kv_events_ports[i * dp_size]
mocker_args["zmq_kv_events_ports"] = str(base_port)
command = _build_mocker_command(
endpoint=self.endpoint,
store_backend=self._store_backend,
num_workers=1,
mocker_args=mocker_args,
)
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = self._request_plane
proc = ManagedProcess(
command=command,
env=env,
timeout=60,
display_output=True,
health_check_ports=[],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name=f"mocker-{i}",
)
proc.__enter__()
self._mocker_processes.append(proc)
# Poll for the new worker_id
new_worker_id = None
for _ in range(120):
ids = set(client.instance_ids())
new = ids - known_ids
if new:
new_worker_id = new.pop()
known_ids.add(new_worker_id)
break
await asyncio.sleep(0.5)
if new_worker_id is None:
raise RuntimeError(
f"Timed out waiting for mocker {i} to register "
f"(known_ids={known_ids})"
)
# Register each dp_rank endpoint with the standalone indexer
zmq_addresses = {}
register_url = f"{self.standalone_indexer_url}/register"
async with aiohttp.ClientSession() as session:
for dp_rank in range(dp_size):
port = self._zmq_kv_events_ports[i * dp_size + dp_rank]
endpoint = f"tcp://127.0.0.1:{port}"
zmq_addresses[str(dp_rank)] = endpoint
payload = {
"instance_id": new_worker_id,
"endpoint": endpoint,
"dp_rank": dp_rank,
}
async with session.post(register_url, json=payload) as resp:
if resp.status != 201:
body = await resp.text()
raise RuntimeError(
f"Failed to register instance {new_worker_id} "
f"dp_rank {dp_rank}: {resp.status} {body}"
)
self.worker_id_to_zmq_ports[new_worker_id] = zmq_addresses
logger.info(
f"Mocker {i}: worker_id={new_worker_id}, "
f"zmq_addresses={zmq_addresses}"
)
logger.info(
f"All {self.num_workers} mockers launched and registered with indexer"
)
def __exit__(self, exc_type, exc_val, exc_tb):
logger.info("Stopping mocker process")
logger.info("Stopping mocker process(es)")
# Stop individual mocker processes (standalone_indexer mode)
for proc in self._mocker_processes:
try:
proc.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping mocker process: {e}")
self._mocker_processes.clear()
# Stop standalone indexer
if self._indexer_process is not None:
try:
self._indexer_process.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping indexer process: {e}")
self._indexer_process = None
# Stop single mocker process (non-standalone mode)
if self._process is not None:
self._process.__exit__(exc_type, exc_val, exc_tb)
if self._zmq_kv_events_ports:
deallocate_ports(self._zmq_kv_events_ports)
......@@ -566,6 +747,7 @@ def test_indexers_sync(
request,
runtime_services_dynamic_ports,
predownload_tokenizers,
build_kv_indexer,
file_storage_backend,
store_backend,
durable_kv_events,
......@@ -577,8 +759,7 @@ def test_indexers_sync(
Tests with three configurations:
- jetstream: etcd backend, JetStream for KV events, NATS request plane
- nats_core: etcd backend, local indexer with NATS Core, TCP request plane
(includes NATS interruption/recovery testing)
- nats_core: etcd backend, NATS Core with gap detection, TCP request plane
- file: file backend, JetStream for KV events, NATS request plane
"""
logger.info(
......@@ -622,6 +803,7 @@ def test_indexers_sync(
test_nats_interruption=not durable_kv_events,
nats_server=nats_process if not durable_kv_events else None,
durable_kv_events=durable_kv_events,
standalone_indexer_url=mockers.standalone_indexer_url,
)
logger.info("Indexers sync test completed successfully")
......@@ -680,6 +862,7 @@ def test_router_decisions(
request,
runtime_services_dynamic_ports,
predownload_tokenizers,
build_kv_indexer,
durable_kv_events,
use_kv_events,
request_plane,
......@@ -713,6 +896,7 @@ def test_router_decisions(
num_mockers=2,
request_plane=request_plane,
zmq_kv_events=zmq_kv_events,
standalone_indexer=zmq_kv_events,
) as mockers:
logger.info(f"All mockers using endpoint: {mockers.endpoint}")
......@@ -730,6 +914,7 @@ def test_router_decisions(
test_dp_rank=True,
use_kv_events=use_kv_events,
durable_kv_events=durable_kv_events,
standalone_indexer_url=mockers.standalone_indexer_url,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment