Unverified Commit b6596c52 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): remove native kv-indexer binary, use maturin-built one (#7338)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 9e9ca3e2
......@@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize};
use crate::protocols::{LocalBlockHash, WorkerId, compute_block_hash_for_seq};
use super::registry::{IndexerKey, WorkerRegistry};
use super::registry::{IndexerKey, ListenerControlError, WorkerRegistry};
pub struct AppState {
pub registry: Arc<WorkerRegistry>,
......@@ -51,12 +51,6 @@ pub struct UnregisterRequest {
pub dp_rank: Option<u32>,
}
#[derive(Serialize)]
struct WorkerInfo {
instance_id: WorkerId,
endpoints: HashMap<u32, String>,
}
#[derive(Deserialize)]
pub struct QueryRequest {
pub token_ids: Vec<u32>,
......@@ -86,6 +80,15 @@ async fn register(
State(state): State<Arc<AppState>>,
Json(req): Json<RegisterRequest>,
) -> impl IntoResponse {
if let Err(error) =
super::validate_listener_endpoints(&req.endpoint, req.replay_endpoint.as_deref())
{
return (
StatusCode::BAD_REQUEST,
Json(serde_json::json!({"error": error.to_string()})),
);
}
match state
.registry
.register(
......@@ -146,16 +149,7 @@ async fn unregister(
}
async fn list_workers(State(state): State<Arc<AppState>>) -> impl IntoResponse {
let workers: Vec<WorkerInfo> = state
.registry
.list()
.into_iter()
.map(|(instance_id, endpoints)| WorkerInfo {
instance_id,
endpoints,
})
.collect();
Json(workers)
Json(state.registry.list())
}
fn build_score_response(
......@@ -254,7 +248,6 @@ async fn query_by_hash(
}
}
#[cfg(feature = "test-endpoints")]
#[derive(Deserialize)]
struct ListenerControlRequest {
instance_id: WorkerId,
......@@ -262,7 +255,6 @@ struct ListenerControlRequest {
dp_rank: Option<u32>,
}
#[cfg(feature = "test-endpoints")]
async fn test_pause_listener(
State(state): State<Arc<AppState>>,
Json(req): Json<ListenerControlRequest>,
......@@ -272,14 +264,10 @@ async fn test_pause_listener(
.pause_listener(req.instance_id, req.dp_rank.unwrap_or(0))
{
Ok(()) => (StatusCode::OK, Json(serde_json::json!({"status": "ok"}))),
Err(e) => (
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": e.to_string()})),
),
Err(error) => listener_control_error_response(error),
}
}
#[cfg(feature = "test-endpoints")]
async fn test_resume_listener(
State(state): State<Arc<AppState>>,
Json(req): Json<ListenerControlRequest>,
......@@ -290,13 +278,26 @@ async fn test_resume_listener(
.await
{
Ok(()) => (StatusCode::OK, Json(serde_json::json!({"status": "ok"}))),
Err(e) => (
StatusCode::CONFLICT,
Json(serde_json::json!({"error": e.to_string()})),
),
Err(error) => listener_control_error_response(error),
}
}
fn listener_control_error_response(
error: ListenerControlError,
) -> (StatusCode, Json<serde_json::Value>) {
let status = match &error {
ListenerControlError::WorkerNotFound { .. }
| ListenerControlError::ListenerNotFound { .. } => StatusCode::NOT_FOUND,
ListenerControlError::DiscoveryManaged { .. }
| ListenerControlError::InvalidPauseState { .. }
| ListenerControlError::InvalidResumeState { .. } => StatusCode::CONFLICT,
};
(
status,
Json(serde_json::json!({"error": error.to_string()})),
)
}
#[derive(Deserialize)]
struct PeerRequest {
url: String,
......@@ -373,6 +374,7 @@ async fn handle_health() -> StatusCode {
#[cfg(feature = "metrics")]
async fn handle_metrics(State(state): State<Arc<AppState>>) -> impl IntoResponse {
state.registry.refresh_metrics();
let encoder = prometheus::TextEncoder::new();
let mut buf = Vec::new();
encoder
......@@ -401,12 +403,10 @@ pub fn create_router(state: Arc<AppState>) -> Router {
.route("/peers", get(list_peers))
.route("/health", get(handle_health));
#[cfg(feature = "test-endpoints")]
let router = router
.route("/test/pause_listener", post(test_pause_listener))
.route("/test/resume_listener", post(test_resume_listener));
let router = router.with_state(state.clone());
.route("/test/resume_listener", post(test_resume_listener))
.with_state(state.clone());
#[cfg(feature = "metrics")]
let router = {
......
......@@ -47,7 +47,7 @@ required-features = ["kv-router-stress"]
# repo
dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["metrics"] }
dynamo-kv-router = { workspace = true, features = ["metrics", "runtime-protocols"] }
dynamo-memory = { workspace = true }
dynamo-mocker = { workspace = true }
......@@ -201,4 +201,3 @@ tonic-build = { version = "0.13.1" }
name = "bench_local_transfer_v2"
path = "bin/bench_local_transfer_v2.rs"
required-features = ["block-manager-bench"]
......@@ -13,12 +13,10 @@
# endpoint_path (e.g., "generate") as the routing key, causing handler collisions when multiple
# workers register the same endpoint. This is a test-only limitation; production deployments
# with separate processes per worker work correctly with TCP.
import asyncio
import logging
import os
from typing import Any, Dict, Optional
import aiohttp
import pytest
from tests.router.common import (
......@@ -27,7 +25,6 @@ from tests.router.common import (
_test_router_basic,
_test_router_decisions,
_test_router_decisions_disagg,
_test_router_indexers_sync,
_test_router_overload_503,
_test_router_query_instance_id,
_test_router_two_routers,
......@@ -176,12 +173,7 @@ def _build_mocker_command(
class MockerProcess:
"""Manages mocker engine instances with shared tokio runtime via --num-workers.
When standalone_indexer=True, launches mockers one-by-one (each as --num-workers 1)
and runs a standalone HTTP KV indexer binary alongside them. Call launch_mockers_with_indexer()
in async context to start mockers and register their ZMQ ports with the indexer.
"""
"""Manages mocker engine instances with shared tokio runtime via --num-workers."""
def __init__(
self,
......@@ -191,7 +183,6 @@ class MockerProcess:
store_backend: str = "etcd",
request_plane: str = "nats",
zmq_kv_events: bool = False,
standalone_indexer: bool = False,
model_name: str = "mocker",
zmq_replay: bool = False,
):
......@@ -203,12 +194,6 @@ class MockerProcess:
self.num_workers = num_mockers
self._zmq_kv_events_ports: list[int] = []
self._zmq_replay_ports: list[int] = []
self._standalone_indexer = standalone_indexer
self._standalone_indexer_port: Optional[int] = None
self._standalone_indexer_b_port: Optional[int] = None
self._indexer_process: Optional[ManagedProcess] = None
self._indexer_b_process: Optional[ManagedProcess] = None
self._mocker_processes: list[ManagedProcess] = []
self._request = request
self._store_backend = store_backend
self._request_plane = request_plane
......@@ -231,7 +216,6 @@ class MockerProcess:
num_mockers * dp_size, BASE_PORT_ZMQ
)
bases = [self._zmq_kv_events_ports[i * dp_size] for i in range(num_mockers)]
if not standalone_indexer:
mocker_args["zmq_kv_events_ports"] = ",".join(str(p) for p in bases)
logger.info(
f"Allocated ZMQ KV event ports {self._zmq_kv_events_ports} "
......@@ -247,22 +231,12 @@ class MockerProcess:
replay_bases = [
self._zmq_replay_ports[i * dp_size] for i in range(num_mockers)
]
if not standalone_indexer:
mocker_args["zmq_replay_ports"] = ",".join(str(p) for p in replay_bases)
logger.info(
f"Allocated ZMQ replay ports {self._zmq_replay_ports} "
f"(bases: {replay_bases}) for {num_mockers} workers"
)
if standalone_indexer:
# Allocate ports for standalone indexer A and B (P2P recovery peer)
indexer_ports = allocate_ports(2, BASE_PORT)
self._standalone_indexer_port = indexer_ports[0]
self._standalone_indexer_b_port = indexer_ports[1]
request.addfinalizer(lambda: deallocate_ports(indexer_ports))
# Don't build a single mocker command — we'll launch per-mocker in launch_mockers_with_indexer
self._process = None
else:
command = _build_mocker_command(
endpoint=self.endpoint,
store_backend=store_backend,
......@@ -285,247 +259,15 @@ class MockerProcess:
)
logger.info(
f"Created mocker process with {num_mockers} worker(s), endpoint: {self.endpoint}"
f"{', standalone_indexer=True' if standalone_indexer else ''}"
)
@property
def standalone_indexer_url(self) -> Optional[str]:
if self._standalone_indexer_port is not None:
return f"http://localhost:{self._standalone_indexer_port}"
return None
@property
def standalone_indexer_b_url(self) -> Optional[str]:
if self._standalone_indexer_b_port is not None:
return f"http://localhost:{self._standalone_indexer_b_port}"
return None
def __enter__(self):
if self._standalone_indexer:
# Launch the standalone indexer binary
block_size = self._mocker_args_orig.get("block_size", BLOCK_SIZE)
indexer_cmd = [
"cargo",
"run",
"-p",
"dynamo-kv-router",
"--features",
"indexer-bin,test-endpoints",
"--bin",
"dynamo-kv-indexer",
"--",
"--block-size",
str(block_size),
"--port",
str(self._standalone_indexer_port),
]
self._indexer_process = ManagedProcess(
command=indexer_cmd,
timeout=120,
display_output=True,
health_check_ports=[self._standalone_indexer_port],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name="dynamo-kv-indexer",
)
logger.info(
f"Starting standalone indexer on port {self._standalone_indexer_port}"
)
self._indexer_process.__enter__()
# Don't start mocker processes yet — launch_mockers_with_indexer will do it
else:
logger.info(f"Starting mocker process with {self.num_workers} worker(s)")
self._process.__enter__()
return self
async def launch_mockers_with_indexer(self, endpoint):
"""Launch mockers one-by-one and register each with the standalone indexer.
For each mocker:
1. Launch a mocker process with --num-workers 1
2. Poll endpoint.client().instance_ids() until a new worker_id appears
3. POST /workers to the indexer with the worker_id and its ZMQ addresses
Args:
endpoint: The dynamo endpoint object to discover worker IDs.
"""
client = await endpoint.client()
known_ids: set[int] = set()
dp_size = self._mocker_args_orig.get("dp_size", 1)
for i in range(self.num_workers):
# Build per-mocker args with its own ZMQ base port
mocker_args = self._mocker_args_orig.copy()
base_port = self._zmq_kv_events_ports[i * dp_size]
mocker_args["zmq_kv_events_ports"] = str(base_port)
if self._zmq_replay_ports:
replay_base = self._zmq_replay_ports[i * dp_size]
mocker_args["zmq_replay_ports"] = str(replay_base)
command = _build_mocker_command(
endpoint=self.endpoint,
store_backend=self._store_backend,
num_workers=1,
mocker_args=mocker_args,
)
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = self._request_plane
proc = ManagedProcess(
command=command,
env=env,
timeout=60,
display_output=True,
health_check_ports=[],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name=f"mocker-{i}",
)
proc.__enter__()
self._mocker_processes.append(proc)
# Poll for the new worker_id
new_worker_id = None
for _ in range(120):
ids = set(client.instance_ids())
new = ids - known_ids
if new:
new_worker_id = new.pop()
known_ids.add(new_worker_id)
break
await asyncio.sleep(0.5)
if new_worker_id is None:
raise RuntimeError(
f"Timed out waiting for mocker {i} to register "
f"(known_ids={known_ids})"
)
# Register each dp_rank endpoint with the standalone indexer
zmq_addresses = {}
register_url = f"{self.standalone_indexer_url}/register"
async with aiohttp.ClientSession() as session:
for dp_rank in range(dp_size):
port = self._zmq_kv_events_ports[i * dp_size + dp_rank]
endpoint = f"tcp://127.0.0.1:{port}"
zmq_addresses[str(dp_rank)] = endpoint
payload = {
"instance_id": new_worker_id,
"endpoint": endpoint,
"dp_rank": dp_rank,
"model_name": self.model_name,
"block_size": self._mocker_args_orig.get(
"block_size", BLOCK_SIZE
),
}
if self._zmq_replay_ports:
replay_port = self._zmq_replay_ports[i * dp_size + dp_rank]
payload["replay_endpoint"] = f"tcp://127.0.0.1:{replay_port}"
async with session.post(register_url, json=payload) as resp:
if resp.status != 201:
body = await resp.text()
raise RuntimeError(
f"Failed to register instance {new_worker_id} "
f"dp_rank {dp_rank}: {resp.status} {body}"
)
self.worker_id_to_zmq_ports[new_worker_id] = zmq_addresses
logger.info(
f"Mocker {i}: worker_id={new_worker_id}, "
f"zmq_addresses={zmq_addresses}"
)
logger.info(
f"All {self.num_workers} mockers launched and registered with indexer"
)
def launch_indexer(self):
"""Launch a second standalone indexer (Indexer B) with --peers pointing to Indexer A.
Workers are passed via --workers so ZMQ sockets connect before recovery
runs, ensuring the subscription handshake completes during the recovery
delay and no events are lost to the ZMQ slow-joiner problem.
"""
if not self._standalone_indexer or self._standalone_indexer_b_port is None:
raise RuntimeError("launch_indexer requires standalone_indexer=True")
if not self.worker_id_to_zmq_ports:
raise RuntimeError("launch_indexer requires workers to be registered first")
block_size = self._mocker_args_orig.get("block_size", BLOCK_SIZE)
# Build --workers arg: "worker_id:dp_rank=zmq_addr,..."
worker_entries = []
for worker_id, zmq_addresses in self.worker_id_to_zmq_ports.items():
for dp_rank_str, zmq_endpoint in zmq_addresses.items():
worker_entries.append(f"{worker_id}:{dp_rank_str}={zmq_endpoint}")
workers_arg = ",".join(worker_entries)
indexer_b_cmd = [
"cargo",
"run",
"-p",
"dynamo-kv-router",
"--features",
"indexer-bin,test-endpoints",
"--bin",
"dynamo-kv-indexer",
"--",
"--block-size",
str(block_size),
"--port",
str(self._standalone_indexer_b_port),
"--peers",
f"http://localhost:{self._standalone_indexer_port}",
"--workers",
workers_arg,
"--model-name",
self.model_name,
]
self._indexer_b_process = ManagedProcess(
command=indexer_b_cmd,
timeout=120,
display_output=True,
health_check_ports=[self._standalone_indexer_b_port],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name="dynamo-kv-indexer-b",
)
logger.info(
f"Starting standalone indexer B on port {self._standalone_indexer_b_port} "
f"with peer http://localhost:{self._standalone_indexer_port}"
)
self._indexer_b_process.__enter__()
def __exit__(self, exc_type, exc_val, exc_tb):
logger.info("Stopping mocker process(es)")
# Stop individual mocker processes (standalone_indexer mode)
for proc in self._mocker_processes:
try:
proc.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping mocker process: {e}")
self._mocker_processes.clear()
# Stop standalone indexer B (P2P recovery peer)
if self._indexer_b_process is not None:
try:
self._indexer_b_process.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping indexer B process: {e}")
self._indexer_b_process = None
# Stop standalone indexer A
if self._indexer_process is not None:
try:
self._indexer_process.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping indexer process: {e}")
self._indexer_process = None
# Stop single mocker process (non-standalone mode)
logger.info("Stopping mocker process")
if self._process is not None:
self._process.__exit__(exc_type, exc_val, exc_tb)
if self._zmq_kv_events_ports:
......@@ -844,92 +586,6 @@ def test_kv_router_bindings(
)
@pytest.mark.parametrize(
"store_backend,durable_kv_events,request_plane",
[
("etcd", True, "nats"), # JetStream mode - uses JetStream
("etcd", False, "tcp"), # NATS core mode (with gap detection) - no JetStream
("file", True, "nats"), # File backend - uses JetStream
],
ids=[
"jetstream",
"nats_core",
"file",
],
indirect=["request_plane", "durable_kv_events"],
)
@pytest.mark.timeout(300)
def test_indexers_sync(
request,
runtime_services_dynamic_ports,
predownload_tokenizers,
file_storage_backend,
store_backend,
durable_kv_events,
request_plane,
):
"""
Test that two KV routers have synchronized indexer states after processing requests.
This test verifies that both routers converge to the same internal state.
Tests with three configurations:
- jetstream: etcd backend, JetStream for KV events, NATS request plane
- nats_core: etcd backend, NATS Core with gap detection, TCP request plane
- file: file backend, JetStream for KV events, NATS request plane
"""
logger.info(
f"Starting indexers sync test: store_backend={store_backend}, "
f"durable_kv_events={durable_kv_events}, request_plane={request_plane}"
)
# Use the dynamic-port fixture to avoid hardcoded localhost:4222/2379 in parallel runs.
nats_process, _etcd_process = runtime_services_dynamic_ports
# Create mocker args dictionary
# Use 2 DP ranks to test per-dp_rank event ID tracking and recovery
mocker_args = {
"speedup_ratio": SPEEDUP_RATIO,
"block_size": BLOCK_SIZE,
"durable_kv_events": durable_kv_events,
"dp_size": 2,
}
with MockerProcess(
request,
mocker_args=mocker_args,
num_mockers=NUM_MOCKERS,
store_backend=store_backend,
request_plane=request_plane,
zmq_kv_events=True,
zmq_replay=True,
standalone_indexer=True,
model_name=MODEL_NAME,
) as mockers:
# Start mocker instances (2 workers x 2 DP ranks = 4 independent event streams)
logger.info(f"Starting {NUM_MOCKERS} mocker instances with dp_size=2")
logger.info(f"All mockers using endpoint: {mockers.endpoint}")
# Use the common test implementation (creates its own runtimes for each router)
# Note: Consumer verification is done inside _test_router_indexers_sync while routers are alive
# When using durable_kv_events=True, use JetStream mode for the router
_test_router_indexers_sync(
engine_workers=mockers,
block_size=BLOCK_SIZE,
model_name=MODEL_NAME,
num_workers=NUM_MOCKERS,
store_backend=store_backend,
request_plane=request_plane,
test_nats_interruption=not durable_kv_events,
nats_server=nats_process if not durable_kv_events else None,
durable_kv_events=durable_kv_events,
standalone_indexer_url=mockers.standalone_indexer_url,
standalone_indexer_b_url=mockers.standalone_indexer_b_url,
test_zmq_replay=True,
)
logger.info("Indexers sync test completed successfully")
@pytest.mark.timeout(120) # bumped for xdist contention (was 42s; ~13.80s serial avg)
@pytest.mark.parametrize(
"durable_kv_events", [False], ids=["nondurable"], indirect=True
......@@ -974,9 +630,8 @@ def test_query_instance_id_returns_worker_and_tokens(
(True, True, False), # JetStream mode with KV events
(False, True, False), # NATS Core mode with local indexer (default)
(False, False, False), # Approximate mode (--no-kv-events) - no KV events
(False, True, True), # ZMQ mode: mocker → ZMQ PUB → relay → NATS
],
ids=["jetstream", "nats_core", "no_kv_events", "zmq"],
ids=["jetstream", "nats_core", "no_kv_events"],
indirect=["durable_kv_events"],
)
def test_router_decisions(
......@@ -1016,7 +671,6 @@ def test_router_decisions(
num_mockers=2,
request_plane=request_plane,
zmq_kv_events=zmq_kv_events,
standalone_indexer=zmq_kv_events,
model_name=MODEL_NAME,
) as mockers:
logger.info(f"All mockers using endpoint: {mockers.endpoint}")
......@@ -1035,7 +689,6 @@ def test_router_decisions(
test_dp_rank=True,
use_kv_events=use_kv_events,
durable_kv_events=durable_kv_events,
standalone_indexer_url=mockers.standalone_indexer_url,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment