Unverified Commit b6596c52 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): remove native kv-indexer binary, use maturin-built one (#7338)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 9e9ca3e2
...@@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize}; ...@@ -15,7 +15,7 @@ use serde::{Deserialize, Serialize};
use crate::protocols::{LocalBlockHash, WorkerId, compute_block_hash_for_seq}; use crate::protocols::{LocalBlockHash, WorkerId, compute_block_hash_for_seq};
use super::registry::{IndexerKey, WorkerRegistry}; use super::registry::{IndexerKey, ListenerControlError, WorkerRegistry};
pub struct AppState { pub struct AppState {
pub registry: Arc<WorkerRegistry>, pub registry: Arc<WorkerRegistry>,
...@@ -51,12 +51,6 @@ pub struct UnregisterRequest { ...@@ -51,12 +51,6 @@ pub struct UnregisterRequest {
pub dp_rank: Option<u32>, pub dp_rank: Option<u32>,
} }
#[derive(Serialize)]
struct WorkerInfo {
instance_id: WorkerId,
endpoints: HashMap<u32, String>,
}
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct QueryRequest { pub struct QueryRequest {
pub token_ids: Vec<u32>, pub token_ids: Vec<u32>,
...@@ -86,6 +80,15 @@ async fn register( ...@@ -86,6 +80,15 @@ async fn register(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
Json(req): Json<RegisterRequest>, Json(req): Json<RegisterRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
if let Err(error) =
super::validate_listener_endpoints(&req.endpoint, req.replay_endpoint.as_deref())
{
return (
StatusCode::BAD_REQUEST,
Json(serde_json::json!({"error": error.to_string()})),
);
}
match state match state
.registry .registry
.register( .register(
...@@ -146,16 +149,7 @@ async fn unregister( ...@@ -146,16 +149,7 @@ async fn unregister(
} }
async fn list_workers(State(state): State<Arc<AppState>>) -> impl IntoResponse { async fn list_workers(State(state): State<Arc<AppState>>) -> impl IntoResponse {
let workers: Vec<WorkerInfo> = state Json(state.registry.list())
.registry
.list()
.into_iter()
.map(|(instance_id, endpoints)| WorkerInfo {
instance_id,
endpoints,
})
.collect();
Json(workers)
} }
fn build_score_response( fn build_score_response(
...@@ -254,7 +248,6 @@ async fn query_by_hash( ...@@ -254,7 +248,6 @@ async fn query_by_hash(
} }
} }
#[cfg(feature = "test-endpoints")]
#[derive(Deserialize)] #[derive(Deserialize)]
struct ListenerControlRequest { struct ListenerControlRequest {
instance_id: WorkerId, instance_id: WorkerId,
...@@ -262,7 +255,6 @@ struct ListenerControlRequest { ...@@ -262,7 +255,6 @@ struct ListenerControlRequest {
dp_rank: Option<u32>, dp_rank: Option<u32>,
} }
#[cfg(feature = "test-endpoints")]
async fn test_pause_listener( async fn test_pause_listener(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
Json(req): Json<ListenerControlRequest>, Json(req): Json<ListenerControlRequest>,
...@@ -272,14 +264,10 @@ async fn test_pause_listener( ...@@ -272,14 +264,10 @@ async fn test_pause_listener(
.pause_listener(req.instance_id, req.dp_rank.unwrap_or(0)) .pause_listener(req.instance_id, req.dp_rank.unwrap_or(0))
{ {
Ok(()) => (StatusCode::OK, Json(serde_json::json!({"status": "ok"}))), Ok(()) => (StatusCode::OK, Json(serde_json::json!({"status": "ok"}))),
Err(e) => ( Err(error) => listener_control_error_response(error),
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": e.to_string()})),
),
} }
} }
#[cfg(feature = "test-endpoints")]
async fn test_resume_listener( async fn test_resume_listener(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
Json(req): Json<ListenerControlRequest>, Json(req): Json<ListenerControlRequest>,
...@@ -290,13 +278,26 @@ async fn test_resume_listener( ...@@ -290,13 +278,26 @@ async fn test_resume_listener(
.await .await
{ {
Ok(()) => (StatusCode::OK, Json(serde_json::json!({"status": "ok"}))), Ok(()) => (StatusCode::OK, Json(serde_json::json!({"status": "ok"}))),
Err(e) => ( Err(error) => listener_control_error_response(error),
StatusCode::CONFLICT,
Json(serde_json::json!({"error": e.to_string()})),
),
} }
} }
fn listener_control_error_response(
error: ListenerControlError,
) -> (StatusCode, Json<serde_json::Value>) {
let status = match &error {
ListenerControlError::WorkerNotFound { .. }
| ListenerControlError::ListenerNotFound { .. } => StatusCode::NOT_FOUND,
ListenerControlError::DiscoveryManaged { .. }
| ListenerControlError::InvalidPauseState { .. }
| ListenerControlError::InvalidResumeState { .. } => StatusCode::CONFLICT,
};
(
status,
Json(serde_json::json!({"error": error.to_string()})),
)
}
#[derive(Deserialize)] #[derive(Deserialize)]
struct PeerRequest { struct PeerRequest {
url: String, url: String,
...@@ -373,6 +374,7 @@ async fn handle_health() -> StatusCode { ...@@ -373,6 +374,7 @@ async fn handle_health() -> StatusCode {
#[cfg(feature = "metrics")] #[cfg(feature = "metrics")]
async fn handle_metrics(State(state): State<Arc<AppState>>) -> impl IntoResponse { async fn handle_metrics(State(state): State<Arc<AppState>>) -> impl IntoResponse {
state.registry.refresh_metrics();
let encoder = prometheus::TextEncoder::new(); let encoder = prometheus::TextEncoder::new();
let mut buf = Vec::new(); let mut buf = Vec::new();
encoder encoder
...@@ -401,12 +403,10 @@ pub fn create_router(state: Arc<AppState>) -> Router { ...@@ -401,12 +403,10 @@ pub fn create_router(state: Arc<AppState>) -> Router {
.route("/peers", get(list_peers)) .route("/peers", get(list_peers))
.route("/health", get(handle_health)); .route("/health", get(handle_health));
#[cfg(feature = "test-endpoints")]
let router = router let router = router
.route("/test/pause_listener", post(test_pause_listener)) .route("/test/pause_listener", post(test_pause_listener))
.route("/test/resume_listener", post(test_resume_listener)); .route("/test/resume_listener", post(test_resume_listener))
.with_state(state.clone());
let router = router.with_state(state.clone());
#[cfg(feature = "metrics")] #[cfg(feature = "metrics")]
let router = { let router = {
......
...@@ -47,7 +47,7 @@ required-features = ["kv-router-stress"] ...@@ -47,7 +47,7 @@ required-features = ["kv-router-stress"]
# repo # repo
dynamo-runtime = { workspace = true } dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true } dynamo-tokens = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["metrics"] } dynamo-kv-router = { workspace = true, features = ["metrics", "runtime-protocols"] }
dynamo-memory = { workspace = true } dynamo-memory = { workspace = true }
dynamo-mocker = { workspace = true } dynamo-mocker = { workspace = true }
...@@ -201,4 +201,3 @@ tonic-build = { version = "0.13.1" } ...@@ -201,4 +201,3 @@ tonic-build = { version = "0.13.1" }
name = "bench_local_transfer_v2" name = "bench_local_transfer_v2"
path = "bin/bench_local_transfer_v2.rs" path = "bin/bench_local_transfer_v2.rs"
required-features = ["block-manager-bench"] required-features = ["block-manager-bench"]
...@@ -13,12 +13,10 @@ ...@@ -13,12 +13,10 @@
# endpoint_path (e.g., "generate") as the routing key, causing handler collisions when multiple # endpoint_path (e.g., "generate") as the routing key, causing handler collisions when multiple
# workers register the same endpoint. This is a test-only limitation; production deployments # workers register the same endpoint. This is a test-only limitation; production deployments
# with separate processes per worker work correctly with TCP. # with separate processes per worker work correctly with TCP.
import asyncio
import logging import logging
import os import os
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import aiohttp
import pytest import pytest
from tests.router.common import ( from tests.router.common import (
...@@ -27,7 +25,6 @@ from tests.router.common import ( ...@@ -27,7 +25,6 @@ from tests.router.common import (
_test_router_basic, _test_router_basic,
_test_router_decisions, _test_router_decisions,
_test_router_decisions_disagg, _test_router_decisions_disagg,
_test_router_indexers_sync,
_test_router_overload_503, _test_router_overload_503,
_test_router_query_instance_id, _test_router_query_instance_id,
_test_router_two_routers, _test_router_two_routers,
...@@ -176,12 +173,7 @@ def _build_mocker_command( ...@@ -176,12 +173,7 @@ def _build_mocker_command(
class MockerProcess: class MockerProcess:
"""Manages mocker engine instances with shared tokio runtime via --num-workers. """Manages mocker engine instances with shared tokio runtime via --num-workers."""
When standalone_indexer=True, launches mockers one-by-one (each as --num-workers 1)
and runs a standalone HTTP KV indexer binary alongside them. Call launch_mockers_with_indexer()
in async context to start mockers and register their ZMQ ports with the indexer.
"""
def __init__( def __init__(
self, self,
...@@ -191,7 +183,6 @@ class MockerProcess: ...@@ -191,7 +183,6 @@ class MockerProcess:
store_backend: str = "etcd", store_backend: str = "etcd",
request_plane: str = "nats", request_plane: str = "nats",
zmq_kv_events: bool = False, zmq_kv_events: bool = False,
standalone_indexer: bool = False,
model_name: str = "mocker", model_name: str = "mocker",
zmq_replay: bool = False, zmq_replay: bool = False,
): ):
...@@ -203,12 +194,6 @@ class MockerProcess: ...@@ -203,12 +194,6 @@ class MockerProcess:
self.num_workers = num_mockers self.num_workers = num_mockers
self._zmq_kv_events_ports: list[int] = [] self._zmq_kv_events_ports: list[int] = []
self._zmq_replay_ports: list[int] = [] self._zmq_replay_ports: list[int] = []
self._standalone_indexer = standalone_indexer
self._standalone_indexer_port: Optional[int] = None
self._standalone_indexer_b_port: Optional[int] = None
self._indexer_process: Optional[ManagedProcess] = None
self._indexer_b_process: Optional[ManagedProcess] = None
self._mocker_processes: list[ManagedProcess] = []
self._request = request self._request = request
self._store_backend = store_backend self._store_backend = store_backend
self._request_plane = request_plane self._request_plane = request_plane
...@@ -231,7 +216,6 @@ class MockerProcess: ...@@ -231,7 +216,6 @@ class MockerProcess:
num_mockers * dp_size, BASE_PORT_ZMQ num_mockers * dp_size, BASE_PORT_ZMQ
) )
bases = [self._zmq_kv_events_ports[i * dp_size] for i in range(num_mockers)] bases = [self._zmq_kv_events_ports[i * dp_size] for i in range(num_mockers)]
if not standalone_indexer:
mocker_args["zmq_kv_events_ports"] = ",".join(str(p) for p in bases) mocker_args["zmq_kv_events_ports"] = ",".join(str(p) for p in bases)
logger.info( logger.info(
f"Allocated ZMQ KV event ports {self._zmq_kv_events_ports} " f"Allocated ZMQ KV event ports {self._zmq_kv_events_ports} "
...@@ -247,22 +231,12 @@ class MockerProcess: ...@@ -247,22 +231,12 @@ class MockerProcess:
replay_bases = [ replay_bases = [
self._zmq_replay_ports[i * dp_size] for i in range(num_mockers) self._zmq_replay_ports[i * dp_size] for i in range(num_mockers)
] ]
if not standalone_indexer:
mocker_args["zmq_replay_ports"] = ",".join(str(p) for p in replay_bases) mocker_args["zmq_replay_ports"] = ",".join(str(p) for p in replay_bases)
logger.info( logger.info(
f"Allocated ZMQ replay ports {self._zmq_replay_ports} " f"Allocated ZMQ replay ports {self._zmq_replay_ports} "
f"(bases: {replay_bases}) for {num_mockers} workers" f"(bases: {replay_bases}) for {num_mockers} workers"
) )
if standalone_indexer:
# Allocate ports for standalone indexer A and B (P2P recovery peer)
indexer_ports = allocate_ports(2, BASE_PORT)
self._standalone_indexer_port = indexer_ports[0]
self._standalone_indexer_b_port = indexer_ports[1]
request.addfinalizer(lambda: deallocate_ports(indexer_ports))
# Don't build a single mocker command — we'll launch per-mocker in launch_mockers_with_indexer
self._process = None
else:
command = _build_mocker_command( command = _build_mocker_command(
endpoint=self.endpoint, endpoint=self.endpoint,
store_backend=store_backend, store_backend=store_backend,
...@@ -285,247 +259,15 @@ class MockerProcess: ...@@ -285,247 +259,15 @@ class MockerProcess:
) )
logger.info( logger.info(
f"Created mocker process with {num_mockers} worker(s), endpoint: {self.endpoint}" f"Created mocker process with {num_mockers} worker(s), endpoint: {self.endpoint}"
f"{', standalone_indexer=True' if standalone_indexer else ''}"
) )
@property
def standalone_indexer_url(self) -> Optional[str]:
if self._standalone_indexer_port is not None:
return f"http://localhost:{self._standalone_indexer_port}"
return None
@property
def standalone_indexer_b_url(self) -> Optional[str]:
if self._standalone_indexer_b_port is not None:
return f"http://localhost:{self._standalone_indexer_b_port}"
return None
def __enter__(self): def __enter__(self):
if self._standalone_indexer:
# Launch the standalone indexer binary
block_size = self._mocker_args_orig.get("block_size", BLOCK_SIZE)
indexer_cmd = [
"cargo",
"run",
"-p",
"dynamo-kv-router",
"--features",
"indexer-bin,test-endpoints",
"--bin",
"dynamo-kv-indexer",
"--",
"--block-size",
str(block_size),
"--port",
str(self._standalone_indexer_port),
]
self._indexer_process = ManagedProcess(
command=indexer_cmd,
timeout=120,
display_output=True,
health_check_ports=[self._standalone_indexer_port],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name="dynamo-kv-indexer",
)
logger.info(
f"Starting standalone indexer on port {self._standalone_indexer_port}"
)
self._indexer_process.__enter__()
# Don't start mocker processes yet — launch_mockers_with_indexer will do it
else:
logger.info(f"Starting mocker process with {self.num_workers} worker(s)") logger.info(f"Starting mocker process with {self.num_workers} worker(s)")
self._process.__enter__() self._process.__enter__()
return self return self
async def launch_mockers_with_indexer(self, endpoint):
"""Launch mockers one-by-one and register each with the standalone indexer.
For each mocker:
1. Launch a mocker process with --num-workers 1
2. Poll endpoint.client().instance_ids() until a new worker_id appears
3. POST /workers to the indexer with the worker_id and its ZMQ addresses
Args:
endpoint: The dynamo endpoint object to discover worker IDs.
"""
client = await endpoint.client()
known_ids: set[int] = set()
dp_size = self._mocker_args_orig.get("dp_size", 1)
for i in range(self.num_workers):
# Build per-mocker args with its own ZMQ base port
mocker_args = self._mocker_args_orig.copy()
base_port = self._zmq_kv_events_ports[i * dp_size]
mocker_args["zmq_kv_events_ports"] = str(base_port)
if self._zmq_replay_ports:
replay_base = self._zmq_replay_ports[i * dp_size]
mocker_args["zmq_replay_ports"] = str(replay_base)
command = _build_mocker_command(
endpoint=self.endpoint,
store_backend=self._store_backend,
num_workers=1,
mocker_args=mocker_args,
)
env = os.environ.copy()
env["DYN_REQUEST_PLANE"] = self._request_plane
proc = ManagedProcess(
command=command,
env=env,
timeout=60,
display_output=True,
health_check_ports=[],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name=f"mocker-{i}",
)
proc.__enter__()
self._mocker_processes.append(proc)
# Poll for the new worker_id
new_worker_id = None
for _ in range(120):
ids = set(client.instance_ids())
new = ids - known_ids
if new:
new_worker_id = new.pop()
known_ids.add(new_worker_id)
break
await asyncio.sleep(0.5)
if new_worker_id is None:
raise RuntimeError(
f"Timed out waiting for mocker {i} to register "
f"(known_ids={known_ids})"
)
# Register each dp_rank endpoint with the standalone indexer
zmq_addresses = {}
register_url = f"{self.standalone_indexer_url}/register"
async with aiohttp.ClientSession() as session:
for dp_rank in range(dp_size):
port = self._zmq_kv_events_ports[i * dp_size + dp_rank]
endpoint = f"tcp://127.0.0.1:{port}"
zmq_addresses[str(dp_rank)] = endpoint
payload = {
"instance_id": new_worker_id,
"endpoint": endpoint,
"dp_rank": dp_rank,
"model_name": self.model_name,
"block_size": self._mocker_args_orig.get(
"block_size", BLOCK_SIZE
),
}
if self._zmq_replay_ports:
replay_port = self._zmq_replay_ports[i * dp_size + dp_rank]
payload["replay_endpoint"] = f"tcp://127.0.0.1:{replay_port}"
async with session.post(register_url, json=payload) as resp:
if resp.status != 201:
body = await resp.text()
raise RuntimeError(
f"Failed to register instance {new_worker_id} "
f"dp_rank {dp_rank}: {resp.status} {body}"
)
self.worker_id_to_zmq_ports[new_worker_id] = zmq_addresses
logger.info(
f"Mocker {i}: worker_id={new_worker_id}, "
f"zmq_addresses={zmq_addresses}"
)
logger.info(
f"All {self.num_workers} mockers launched and registered with indexer"
)
def launch_indexer(self):
"""Launch a second standalone indexer (Indexer B) with --peers pointing to Indexer A.
Workers are passed via --workers so ZMQ sockets connect before recovery
runs, ensuring the subscription handshake completes during the recovery
delay and no events are lost to the ZMQ slow-joiner problem.
"""
if not self._standalone_indexer or self._standalone_indexer_b_port is None:
raise RuntimeError("launch_indexer requires standalone_indexer=True")
if not self.worker_id_to_zmq_ports:
raise RuntimeError("launch_indexer requires workers to be registered first")
block_size = self._mocker_args_orig.get("block_size", BLOCK_SIZE)
# Build --workers arg: "worker_id:dp_rank=zmq_addr,..."
worker_entries = []
for worker_id, zmq_addresses in self.worker_id_to_zmq_ports.items():
for dp_rank_str, zmq_endpoint in zmq_addresses.items():
worker_entries.append(f"{worker_id}:{dp_rank_str}={zmq_endpoint}")
workers_arg = ",".join(worker_entries)
indexer_b_cmd = [
"cargo",
"run",
"-p",
"dynamo-kv-router",
"--features",
"indexer-bin,test-endpoints",
"--bin",
"dynamo-kv-indexer",
"--",
"--block-size",
str(block_size),
"--port",
str(self._standalone_indexer_b_port),
"--peers",
f"http://localhost:{self._standalone_indexer_port}",
"--workers",
workers_arg,
"--model-name",
self.model_name,
]
self._indexer_b_process = ManagedProcess(
command=indexer_b_cmd,
timeout=120,
display_output=True,
health_check_ports=[self._standalone_indexer_b_port],
health_check_urls=[],
log_dir=self._request.node.name,
terminate_all_matching_process_names=False,
display_name="dynamo-kv-indexer-b",
)
logger.info(
f"Starting standalone indexer B on port {self._standalone_indexer_b_port} "
f"with peer http://localhost:{self._standalone_indexer_port}"
)
self._indexer_b_process.__enter__()
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
logger.info("Stopping mocker process(es)") logger.info("Stopping mocker process")
# Stop individual mocker processes (standalone_indexer mode)
for proc in self._mocker_processes:
try:
proc.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping mocker process: {e}")
self._mocker_processes.clear()
# Stop standalone indexer B (P2P recovery peer)
if self._indexer_b_process is not None:
try:
self._indexer_b_process.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping indexer B process: {e}")
self._indexer_b_process = None
# Stop standalone indexer A
if self._indexer_process is not None:
try:
self._indexer_process.__exit__(exc_type, exc_val, exc_tb)
except Exception as e:
logger.warning(f"Error stopping indexer process: {e}")
self._indexer_process = None
# Stop single mocker process (non-standalone mode)
if self._process is not None: if self._process is not None:
self._process.__exit__(exc_type, exc_val, exc_tb) self._process.__exit__(exc_type, exc_val, exc_tb)
if self._zmq_kv_events_ports: if self._zmq_kv_events_ports:
...@@ -844,92 +586,6 @@ def test_kv_router_bindings( ...@@ -844,92 +586,6 @@ def test_kv_router_bindings(
) )
@pytest.mark.parametrize(
"store_backend,durable_kv_events,request_plane",
[
("etcd", True, "nats"), # JetStream mode - uses JetStream
("etcd", False, "tcp"), # NATS core mode (with gap detection) - no JetStream
("file", True, "nats"), # File backend - uses JetStream
],
ids=[
"jetstream",
"nats_core",
"file",
],
indirect=["request_plane", "durable_kv_events"],
)
@pytest.mark.timeout(300)
def test_indexers_sync(
request,
runtime_services_dynamic_ports,
predownload_tokenizers,
file_storage_backend,
store_backend,
durable_kv_events,
request_plane,
):
"""
Test that two KV routers have synchronized indexer states after processing requests.
This test verifies that both routers converge to the same internal state.
Tests with three configurations:
- jetstream: etcd backend, JetStream for KV events, NATS request plane
- nats_core: etcd backend, NATS Core with gap detection, TCP request plane
- file: file backend, JetStream for KV events, NATS request plane
"""
logger.info(
f"Starting indexers sync test: store_backend={store_backend}, "
f"durable_kv_events={durable_kv_events}, request_plane={request_plane}"
)
# Use the dynamic-port fixture to avoid hardcoded localhost:4222/2379 in parallel runs.
nats_process, _etcd_process = runtime_services_dynamic_ports
# Create mocker args dictionary
# Use 2 DP ranks to test per-dp_rank event ID tracking and recovery
mocker_args = {
"speedup_ratio": SPEEDUP_RATIO,
"block_size": BLOCK_SIZE,
"durable_kv_events": durable_kv_events,
"dp_size": 2,
}
with MockerProcess(
request,
mocker_args=mocker_args,
num_mockers=NUM_MOCKERS,
store_backend=store_backend,
request_plane=request_plane,
zmq_kv_events=True,
zmq_replay=True,
standalone_indexer=True,
model_name=MODEL_NAME,
) as mockers:
# Start mocker instances (2 workers x 2 DP ranks = 4 independent event streams)
logger.info(f"Starting {NUM_MOCKERS} mocker instances with dp_size=2")
logger.info(f"All mockers using endpoint: {mockers.endpoint}")
# Use the common test implementation (creates its own runtimes for each router)
# Note: Consumer verification is done inside _test_router_indexers_sync while routers are alive
# When using durable_kv_events=True, use JetStream mode for the router
_test_router_indexers_sync(
engine_workers=mockers,
block_size=BLOCK_SIZE,
model_name=MODEL_NAME,
num_workers=NUM_MOCKERS,
store_backend=store_backend,
request_plane=request_plane,
test_nats_interruption=not durable_kv_events,
nats_server=nats_process if not durable_kv_events else None,
durable_kv_events=durable_kv_events,
standalone_indexer_url=mockers.standalone_indexer_url,
standalone_indexer_b_url=mockers.standalone_indexer_b_url,
test_zmq_replay=True,
)
logger.info("Indexers sync test completed successfully")
@pytest.mark.timeout(120) # bumped for xdist contention (was 42s; ~13.80s serial avg) @pytest.mark.timeout(120) # bumped for xdist contention (was 42s; ~13.80s serial avg)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"durable_kv_events", [False], ids=["nondurable"], indirect=True "durable_kv_events", [False], ids=["nondurable"], indirect=True
...@@ -974,9 +630,8 @@ def test_query_instance_id_returns_worker_and_tokens( ...@@ -974,9 +630,8 @@ def test_query_instance_id_returns_worker_and_tokens(
(True, True, False), # JetStream mode with KV events (True, True, False), # JetStream mode with KV events
(False, True, False), # NATS Core mode with local indexer (default) (False, True, False), # NATS Core mode with local indexer (default)
(False, False, False), # Approximate mode (--no-kv-events) - no KV events (False, False, False), # Approximate mode (--no-kv-events) - no KV events
(False, True, True), # ZMQ mode: mocker → ZMQ PUB → relay → NATS
], ],
ids=["jetstream", "nats_core", "no_kv_events", "zmq"], ids=["jetstream", "nats_core", "no_kv_events"],
indirect=["durable_kv_events"], indirect=["durable_kv_events"],
) )
def test_router_decisions( def test_router_decisions(
...@@ -1016,7 +671,6 @@ def test_router_decisions( ...@@ -1016,7 +671,6 @@ def test_router_decisions(
num_mockers=2, num_mockers=2,
request_plane=request_plane, request_plane=request_plane,
zmq_kv_events=zmq_kv_events, zmq_kv_events=zmq_kv_events,
standalone_indexer=zmq_kv_events,
model_name=MODEL_NAME, model_name=MODEL_NAME,
) as mockers: ) as mockers:
logger.info(f"All mockers using endpoint: {mockers.endpoint}") logger.info(f"All mockers using endpoint: {mockers.endpoint}")
...@@ -1035,7 +689,6 @@ def test_router_decisions( ...@@ -1035,7 +689,6 @@ def test_router_decisions(
test_dp_rank=True, test_dp_rank=True,
use_kv_events=use_kv_events, use_kv_events=use_kv_events,
durable_kv_events=durable_kv_events, durable_kv_events=durable_kv_events,
standalone_indexer_url=mockers.standalone_indexer_url,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment