fix: multimodal worker internal-only to prevent registration collision (#5986)

Signed-off-by: Krishnan Prashanth <kprashanth@nvidia.com>

fix: multimodal worker internal-only to prevent registration collision (#5986)
Signed-off-by: Krishnan Prashanth <kprashanth@nvidia.com>
6b70d845 · KrishnanPrash · GitHub · cfba042e · 6b70d845
Unverified Commit 6b70d845 authored Feb 05, 2026 by KrishnanPrash Committed by GitHub Feb 05, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 28 deletions

components/src/dynamo/sglang/main.py components/src/dynamo/sglang/main.py +14 -28

No files found.
--- a/components/src/dynamo/sglang/main.py
+++ b/components/src/dynamo/sglang/main.py
@@ -622,7 +622,12 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con


 async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
-    """Initialize multimodal worker component for aggregated or decode mode"""
+    """Initialize multimodal worker component.
+
+    This worker is always an internal component that should not register with
+    the Frontend. Public registration is handled by the Processor component
+    (--multimodal-processor). For standalone serving, use init() (default).
+    """
    server_args, dynamo_args = config.server_args, config.dynamo_args

    component = runtime.namespace(dynamo_args.namespace).component(
@@ -648,35 +653,16 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
    await handler.async_init()

    health_check_payload = SglangHealthCheckPayload(engine).to_dict()
-    ready_event = asyncio.Event()

    try:
-        if config.serving_mode == DisaggregationMode.DECODE:
-            # Decode Worker is an internal component, should not register with Frontend
-            # Only needs to provide internal service endpoint for Processor to call
-            await generate_endpoint.serve_endpoint(
-                handler.generate,
-                metrics_labels=[("model", server_args.served_model_name)],
-                graceful_shutdown=True,
-                health_check_payload=health_check_payload,
-            )
-        else:
-            # In aggregated mode, need to register with Frontend
-            await asyncio.gather(
-                generate_endpoint.serve_endpoint(
-                    handler.generate,
-                    metrics_labels=[("model", server_args.served_model_name)],
-                    graceful_shutdown=True,
-                    health_check_payload=health_check_payload,
-                ),
-                register_llm_with_readiness_gate(
-                    engine,
-                    generate_endpoint,
-                    server_args,
-                    dynamo_args,
-                    readiness_gate=ready_event,
-                ),
-            )
+        # Multimodal Worker is an internal component, should not register with Frontend.
+        # Only needs to provide internal service endpoint for Processor to call.
+        await generate_endpoint.serve_endpoint(
+            handler.generate,
+            metrics_labels=[("model", server_args.served_model_name)],
+            graceful_shutdown=True,
+            health_check_payload=health_check_payload,
+        )
    except Exception as e:
        logging.error(f"Failed to serve endpoints: {e}")
        raise