feat: Extract Common Configs + Log Configs on Init + Add `test_` to...

feat: Extract Common Configs + Log Configs on Init + Add `test_` to `sdk/tests` filenames required for pytest (#434) Co-authored-by: ishandhanani <82981111+ishandhanani@users.noreply.github.com>

feat: Extract Common Configs + Log Configs on Init + Add `test_` to...
feat: Extract Common Configs + Log Configs on Init + Add `test_` to `sdk/tests` filenames required for pytest (#434) Co-authored-by: ishandhanani <82981111+ishandhanani@users.noreply.github.com>
0292feb5 · jon-chuang · GitHub · 0186aa7b · 0292feb5 · 0292feb5
Commit 0292feb5 authored Apr 09, 2025 by jon-chuang Committed by GitHub Apr 09, 2025
13 changed files
--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
@@ -217,17 +217,17 @@ def build_serve_command() -> click.Group:
                # Initialize service_configs as empty dict if it's None
                # Convert nested YAML structure to flat dict with dot notation
                for service, configs in yaml_configs.items():
-                    for key, value in configs.items():
                    if service not in service_configs:
                        service_configs[service] = {}
+                    for key, value in configs.items():
                        service_configs[service][key] = value

        # Process service-specific options
        cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
        for service, configs in cmdline_overrides.items():
-            for key, value in configs.items():
            if service not in service_configs:
                service_configs[service] = {}
+            for key, value in configs.items():
                service_configs[service][key] = value

        # Process depends
@@ -243,11 +243,12 @@ def build_serve_command() -> click.Group:
            rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
            sys.exit(0)

+        configure_server_logging()
        # Set environment variable with service configuration
        if service_configs:
+            logger.info(f"Running dynamo serve with service configs {service_configs}")
            os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)

-        configure_server_logging()
        if working_dir is None:
            if os.path.isdir(os.path.expanduser(bento)):
                working_dir = os.path.expanduser(bento)

--- a/deploy/dynamo/sdk/src/dynamo/sdk/lib/config.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/lib/config.py
@@ -14,8 +14,11 @@
 # limitations under the License.

 import json
+import logging
 import os

+logger = logging.getLogger(__name__)
+

 class ServiceConfig(dict):
    """Configuration store that inherits from dict for simpler access patterns"""
@@ -47,14 +50,27 @@ class ServiceConfig(dict):
        return self[service_name][key]

    def as_args(self, service_name, prefix=""):
-        """Extract configs as CLI args for a service, with optional prefix filtering"""
+        """Extract configs as CLI args for a service, with optional prefix filtering.
+
+        Every component will additionally have the args in the `Common` configs
+        applied if it has subscribed to that config key, i.e. the given key is provided in
+        the component's `common-configs` setting, and that key has not been overriden by the
+        component's config.
+        """
+        COMMON_CONFIG_SERVICE = "Common"
+        COMMON_CONFIG_KEY = "common-configs"
+
        if service_name not in self:
            return []

-        args = []
-        for key, value in self[service_name].items():
+        args: list[str] = []
+
+        def add_to_args(args: list[str], key: str, value):
            if prefix and not key.startswith(prefix):
-                continue
+                return
+
+            if key.endswith(COMMON_CONFIG_KEY):
+                return

            # Strip prefix if needed
            arg_key = key[len(prefix) :] if prefix and key.startswith(prefix) else key
@@ -68,4 +84,16 @@ class ServiceConfig(dict):
            else:
                args.extend([f"--{arg_key}", str(value)])

+        if (common := self.get(COMMON_CONFIG_SERVICE)) is not None and (
+            common_config_keys := self[service_name].get(COMMON_CONFIG_KEY)
+        ) is not None:
+            for key in common_config_keys:
+                if key in common and key not in self[service_name]:
+                    add_to_args(args, key, common[key])
+
+        for key, value in self[service_name].items():
+            add_to_args(args, key, value)
+
+        logger.info(f"Running {service_name} with {args=}")
+
        return args
--- a/deploy/dynamo/sdk/src/dynamo/sdk/tests/pipeline.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/tests/pipeline.py
@@ -86,15 +86,15 @@ class Backend2:
    backend = depends(Backend)

    def __init__(self) -> None:
-        print("Starting middle2")
+        print("Starting backend2")

    @dynamo_endpoint()
    async def generate(self, req: RequestType):
        """Forward requests to backend."""

        req_text = req.text
-        print(f"Middle2 received: {req_text}")
-        text = f"{req_text}-mid2"
+        print(f"Backend2 received: {req_text}")
+        text = f"{req_text}-back2"
        next_request = RequestType(text=text).model_dump_json()
        print(next_request)

@@ -117,8 +117,17 @@ class Middle:
        req_text = req.text
        print(f"Middle received: {req_text}")
        text = f"{req_text}-mid"
-        for token in text.split():
-            yield f"Mid: {token}"
+
+        txt = RequestType(text=text)
+
+        if self.backend:
+            async for back_resp in self.backend.generate(txt.model_dump_json()):
+                print(f"Frontend received back_resp: {back_resp}")
+                yield f"Frontend: {back_resp}"
+        else:
+            async for back_resp in self.backend2.generate(txt.model_dump_json()):
+                print(f"Frontend received back_resp: {back_resp}")
+                yield f"Frontend: {back_resp}"


 @service(resources={"cpu": "1"}, traffic={"timeout": 60})
@@ -136,11 +145,6 @@ class Frontend:
        print(f"Frontend received type: {type(text)}")
        txt = RequestType(text=text)
        print(f"Frontend sending: {type(txt)}")
-        if self.backend:
-            async for back_resp in self.backend.generate(txt.model_dump_json()):
-                print(f"Frontend received back_resp: {back_resp}")
-                yield f"Frontend: {back_resp}"
-        else:
        async for mid_resp in self.middle.generate(txt.model_dump_json()):
            print(f"Frontend received mid_resp: {mid_resp}")
            yield f"Frontend: {mid_resp}"
--- a/deploy/dynamo/sdk/src/dynamo/sdk/tests/test_config.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/tests/test_config.py
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+from dynamo.sdk.lib.config import ServiceConfig
+
+pytestmark = pytest.mark.pre_merge
+
+
+def test_service_config_with_common_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with config that includes common-configs
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "Common": {
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        },
+        "VllmWorker": {
+            "enforce-eager": true,
+            "common-configs": ["model", "block-size", "max-model-len"]
+        }
+    }
+    """
+
+    # Get arguments and verify common configs are included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that each common config appears in the arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" in vllm_worker_args
+
+
+def test_service_config_without_common_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with config that DOESN'T include common-configs
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "Common": {
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        },
+        "VllmWorker": {
+            "enforce-eager": true
+        }
+    }
+    """
+
+    # Get arguments and verify common configs are NOT included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that none of the common configs appear in arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" not in vllm_worker_args
+
+
+def test_service_config_with_direct_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with direct configs (no Common section reference)
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "VllmWorker": {
+            "enforce-eager": true,
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        }
+    }
+    """
+
+    # Get arguments and verify direct configs are included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that each config appears in the arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" in vllm_worker_args
+
+
+def test_service_config_override_common_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with config that includes common-configs
+    # overridden by the subscribing config
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "Common": {
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        },
+        "VllmWorker": {
+            "enforce-eager": true,
+            "block-size": 128,
+            "common-configs": ["model", "block-size", "max-model-len"]
+        }
+    }
+    """
+
+    # Get arguments and verify common configs are included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that each common config appears in the arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" in vllm_worker_args
+
+    assert vllm_worker_args[vllm_worker_args.index("--block-size") + 1] == "128"
--- a/deploy/dynamo/sdk/src/dynamo/sdk/tests/e2e.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/tests/e2e.py
@@ -79,4 +79,4 @@ async def test_pipeline():
            if attempt == max_retries - 1:
                raise
            print(f"Attempt {attempt + 1} failed, retrying...")
-            await asyncio.sleep(1)
+            await asyncio.sleep(3)
--- a/deploy/dynamo/sdk/src/dynamo/sdk/tests/link.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/tests/link.py
--- a/docs/guides/dynamo_serve.md
+++ b/docs/guides/dynamo_serve.md
@@ -162,36 +162,36 @@ This will print out something like
 ```bash
 Service Configuration:
 {
+  "Common": {
+    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+    "block-size": 64,
+    "max-model-len": 16384,
+  },
  "Frontend": {
    "served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
    "endpoint": "dynamo.Processor.chat/completions",
    "port": 8000
  },
  "Processor": {
-    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
-    "block-size": 64,
-    "max-model-len": 16384,
-    "router": "round-robin"
+    "router": "round-robin",
+    "common-configs": [model, block-size, max-model-len]
  },
  "VllmWorker": {
-    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
    "enforce-eager": true,
-    "block-size": 64,
-    "max-model-len": 16384,
    "max-num-batched-tokens": 16384,
    "enable-prefix-caching": true,
    "router": "random",
    "tensor-parallel-size": 1,
    "ServiceArgs": {
      "workers": 1
-    }
+    },
+    "common-configs": [model, block-size, max-model-len]
  }
 }

 Environment Variable that would be set:
-DYNAMO_SERVICE_CONFIG={"Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64,
-"max-model-len": 16384, "router": "round-robin"}, "VllmWorker": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "enforce-eager": true, "block-size": 64, "max-model-len": 16384, "max-num-batched-tokens": 16384, "enable-prefix-caching":
-true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}}}
+DYNAMO_SERVICE_CONFIG={"Common": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64, "max-model-len": 16384}, "Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"router": "round-robin", "common-configs": ["model", "block-size", "max-model-len"]}, "VllmWorker": {"enforce-eager": true, "max-num-batched-tokens": 16384, "enable-prefix-caching":
+true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}, "common-configs": ["model", "block-size", "max-model-len"]}}
 ```

 You can override any of these configuration options by passing in CLI flags to serve. For example, to change the routing strategy, you can run

--- a/examples/llm/components/kv_router.py
+++ b/examples/llm/components/kv_router.py
@@ -43,7 +43,7 @@ def parse_args(service_name, prefix) -> Namespace:
        help="Minimum number of workers required before proceeding",
    )
    parser.add_argument(
-        "--model-name",
+        "--model",
        type=str,
        default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        help="Model that is being served",

--- a/examples/llm/configs/agg.yaml
+++ b/examples/llm/configs/agg.yaml
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+Common:
+  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  block-size: 64
+  max-model-len: 16384

 Frontend:
  served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
@@ -19,16 +23,11 @@ Frontend:
  port: 8000

 Processor:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  block-size: 64
-  max-model-len: 16384
  router: round-robin
+  common-configs: [model, block-size, max-model-len]

 VllmWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  enforce-eager: true
-  block-size: 64
-  max-model-len: 16384
  max-num-batched-tokens: 16384
  enable-prefix-caching: true
  router: random
@@ -37,3 +36,4 @@ VllmWorker:
    workers: 1
    resources:
      gpu: 1
+  common-configs: [model, block-size, max-model-len]
--- a/examples/llm/configs/agg_router.yaml
+++ b/examples/llm/configs/agg_router.yaml
@@ -12,6 +12,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+Common:
+  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  router: kv
+  block-size: 64
+  max-model-len: 16384
+  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'

 Frontend:
  served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
@@ -19,26 +25,19 @@ Frontend:
  port: 8000

 Processor:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  block-size: 64
-  max-model-len: 16384
-  router: kv
+  common-configs: [model, block-size, max-model-len, router]

 Router:
-  model-name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  min-workers: 1
+  common-configs: [model]

 VllmWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  enforce-eager: true
-  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
-  block-size: 64
-  max-model-len: 16384
  max-num-batched-tokens: 16384
  enable-prefix-caching: true
-  router: kv
  tensor-parallel-size: 1
  ServiceArgs:
    workers: 1
    resources:
      gpu: 1
+  common-configs: [model, block-size, max-model-len, router, kv-transfer-config]
--- a/examples/llm/configs/disagg.yaml
+++ b/examples/llm/configs/disagg.yaml
@@ -12,7 +12,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+Common:
+  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  block-size: 64
+  max-model-len: 16384
+  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'

 Frontend:
  served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
@@ -20,13 +24,10 @@ Frontend:
  port: 8000

 Processor:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  router: round-robin
+  common-configs: [model, block-size]

 VllmWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
-  max-model-len: 16384
  remote-prefill: true
  conditional-disagg: true
  max-local-prefill-length: 10
@@ -35,13 +36,12 @@ VllmWorker:
    workers: 1
    resources:
      gpu: 1
+  common-configs: [model, block-size, max-model-len, kv-transfer-config]

 PrefillWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
-  max-model-len: 16384
  max-num-batched-tokens: 16384
  ServiceArgs:
    workers: 1
    resources:
      gpu: 1
+  common-configs: [model, block-size, max-model-len, kv-transfer-config]
--- a/examples/llm/configs/disagg_router.yaml
+++ b/examples/llm/configs/disagg_router.yaml
@@ -12,6 +12,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+Common:
+  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  block-size: 64
+  max-model-len: 16384
+  router: kv
+  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'

 Frontend:
  served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
@@ -19,41 +25,30 @@ Frontend:
  port: 8000

 Processor:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  block-size: 64
-  max-model-len: 16384
-  router: kv
+  common-configs: [model, block-size, max-model-len, router]

 Router:
-  model-name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  min-workers: 1
+  common-configs: [model]

 VllmWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
-  block-size: 64
-  max-model-len: 16384
  max-num-batched-tokens: 16384
  remote-prefill: true
  conditional-disagg: true
  max-local-prefill-length: 10
  max-prefill-queue-size: 2
  tensor-parallel-size: 1
-  router: kv
  enable-prefix-caching: true
  ServiceArgs:
    workers: 1
    resources:
      gpu: 1
+  common-configs: [model, block-size, max-model-len, router, kv-transfer-config]

-# TODO - set all of these but model as default
 PrefillWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
-  block-size: 64
-  max-model-len: 16384
  max-num-batched-tokens: 16384
  ServiceArgs:
    workers: 1
    resources:
      gpu: 1
+  common-configs: [model, block-size, max-model-len, kv-transfer-config]
--- a/lib/llm/src/model_card/model.rs
+++ b/lib/llm/src/model_card/model.rs
@@ -89,7 +89,7 @@ pub enum PromptContextMixin {
    /// Support OAI Chat Messages and Tools
    OaiChat,

-    /// Enables templates with `{{datatime}}` to be rendered with the current date and time.
+    /// Enables templates with `{{datetime}}` to be rendered with the current date and time.
    Llama3DateTime,
 }