Commit 0292feb5 authored by jon-chuang's avatar jon-chuang Committed by GitHub
Browse files

feat: Extract Common Configs + Log Configs on Init + Add `test_` to...


feat: Extract Common Configs + Log Configs on Init + Add `test_` to `sdk/tests` filenames required for pytest (#434)
Co-authored-by: default avatarishandhanani <82981111+ishandhanani@users.noreply.github.com>
parent 0186aa7b
...@@ -217,17 +217,17 @@ def build_serve_command() -> click.Group: ...@@ -217,17 +217,17 @@ def build_serve_command() -> click.Group:
# Initialize service_configs as empty dict if it's None # Initialize service_configs as empty dict if it's None
# Convert nested YAML structure to flat dict with dot notation # Convert nested YAML structure to flat dict with dot notation
for service, configs in yaml_configs.items(): for service, configs in yaml_configs.items():
for key, value in configs.items():
if service not in service_configs: if service not in service_configs:
service_configs[service] = {} service_configs[service] = {}
for key, value in configs.items():
service_configs[service][key] = value service_configs[service][key] = value
# Process service-specific options # Process service-specific options
cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args) cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
for service, configs in cmdline_overrides.items(): for service, configs in cmdline_overrides.items():
for key, value in configs.items():
if service not in service_configs: if service not in service_configs:
service_configs[service] = {} service_configs[service] = {}
for key, value in configs.items():
service_configs[service][key] = value service_configs[service][key] = value
# Process depends # Process depends
...@@ -243,11 +243,12 @@ def build_serve_command() -> click.Group: ...@@ -243,11 +243,12 @@ def build_serve_command() -> click.Group:
rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}") rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
sys.exit(0) sys.exit(0)
configure_server_logging()
# Set environment variable with service configuration # Set environment variable with service configuration
if service_configs: if service_configs:
logger.info(f"Running dynamo serve with service configs {service_configs}")
os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs) os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)
configure_server_logging()
if working_dir is None: if working_dir is None:
if os.path.isdir(os.path.expanduser(bento)): if os.path.isdir(os.path.expanduser(bento)):
working_dir = os.path.expanduser(bento) working_dir = os.path.expanduser(bento)
......
...@@ -14,8 +14,11 @@ ...@@ -14,8 +14,11 @@
# limitations under the License. # limitations under the License.
import json import json
import logging
import os import os
logger = logging.getLogger(__name__)
class ServiceConfig(dict): class ServiceConfig(dict):
"""Configuration store that inherits from dict for simpler access patterns""" """Configuration store that inherits from dict for simpler access patterns"""
...@@ -47,14 +50,27 @@ class ServiceConfig(dict): ...@@ -47,14 +50,27 @@ class ServiceConfig(dict):
return self[service_name][key] return self[service_name][key]
def as_args(self, service_name, prefix=""): def as_args(self, service_name, prefix=""):
"""Extract configs as CLI args for a service, with optional prefix filtering""" """Extract configs as CLI args for a service, with optional prefix filtering.
Every component will additionally have the args in the `Common` configs
applied if it has subscribed to that config key, i.e. the given key is provided in
the component's `common-configs` setting, and that key has not been overriden by the
component's config.
"""
COMMON_CONFIG_SERVICE = "Common"
COMMON_CONFIG_KEY = "common-configs"
if service_name not in self: if service_name not in self:
return [] return []
args = [] args: list[str] = []
for key, value in self[service_name].items():
def add_to_args(args: list[str], key: str, value):
if prefix and not key.startswith(prefix): if prefix and not key.startswith(prefix):
continue return
if key.endswith(COMMON_CONFIG_KEY):
return
# Strip prefix if needed # Strip prefix if needed
arg_key = key[len(prefix) :] if prefix and key.startswith(prefix) else key arg_key = key[len(prefix) :] if prefix and key.startswith(prefix) else key
...@@ -68,4 +84,16 @@ class ServiceConfig(dict): ...@@ -68,4 +84,16 @@ class ServiceConfig(dict):
else: else:
args.extend([f"--{arg_key}", str(value)]) args.extend([f"--{arg_key}", str(value)])
if (common := self.get(COMMON_CONFIG_SERVICE)) is not None and (
common_config_keys := self[service_name].get(COMMON_CONFIG_KEY)
) is not None:
for key in common_config_keys:
if key in common and key not in self[service_name]:
add_to_args(args, key, common[key])
for key, value in self[service_name].items():
add_to_args(args, key, value)
logger.info(f"Running {service_name} with {args=}")
return args return args
...@@ -86,15 +86,15 @@ class Backend2: ...@@ -86,15 +86,15 @@ class Backend2:
backend = depends(Backend) backend = depends(Backend)
def __init__(self) -> None: def __init__(self) -> None:
print("Starting middle2") print("Starting backend2")
@dynamo_endpoint() @dynamo_endpoint()
async def generate(self, req: RequestType): async def generate(self, req: RequestType):
"""Forward requests to backend.""" """Forward requests to backend."""
req_text = req.text req_text = req.text
print(f"Middle2 received: {req_text}") print(f"Backend2 received: {req_text}")
text = f"{req_text}-mid2" text = f"{req_text}-back2"
next_request = RequestType(text=text).model_dump_json() next_request = RequestType(text=text).model_dump_json()
print(next_request) print(next_request)
...@@ -117,8 +117,17 @@ class Middle: ...@@ -117,8 +117,17 @@ class Middle:
req_text = req.text req_text = req.text
print(f"Middle received: {req_text}") print(f"Middle received: {req_text}")
text = f"{req_text}-mid" text = f"{req_text}-mid"
for token in text.split():
yield f"Mid: {token}" txt = RequestType(text=text)
if self.backend:
async for back_resp in self.backend.generate(txt.model_dump_json()):
print(f"Frontend received back_resp: {back_resp}")
yield f"Frontend: {back_resp}"
else:
async for back_resp in self.backend2.generate(txt.model_dump_json()):
print(f"Frontend received back_resp: {back_resp}")
yield f"Frontend: {back_resp}"
@service(resources={"cpu": "1"}, traffic={"timeout": 60}) @service(resources={"cpu": "1"}, traffic={"timeout": 60})
...@@ -136,11 +145,6 @@ class Frontend: ...@@ -136,11 +145,6 @@ class Frontend:
print(f"Frontend received type: {type(text)}") print(f"Frontend received type: {type(text)}")
txt = RequestType(text=text) txt = RequestType(text=text)
print(f"Frontend sending: {type(txt)}") print(f"Frontend sending: {type(txt)}")
if self.backend:
async for back_resp in self.backend.generate(txt.model_dump_json()):
print(f"Frontend received back_resp: {back_resp}")
yield f"Frontend: {back_resp}"
else:
async for mid_resp in self.middle.generate(txt.model_dump_json()): async for mid_resp in self.middle.generate(txt.model_dump_json()):
print(f"Frontend received mid_resp: {mid_resp}") print(f"Frontend received mid_resp: {mid_resp}")
yield f"Frontend: {mid_resp}" yield f"Frontend: {mid_resp}"
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import pytest
from dynamo.sdk.lib.config import ServiceConfig
pytestmark = pytest.mark.pre_merge
def test_service_config_with_common_configs():
# Reset singleton instance
ServiceConfig._instance = None
# Set environment variable with config that includes common-configs
os.environ[
"DYNAMO_SERVICE_CONFIG"
] = """
{
"Common": {
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"block-size": 64,
"max-model-len": 16384
},
"VllmWorker": {
"enforce-eager": true,
"common-configs": ["model", "block-size", "max-model-len"]
}
}
"""
# Get arguments and verify common configs are included
service_config = ServiceConfig.get_instance()
vllm_worker_args = service_config.as_args("VllmWorker")
# Check that each common config appears in the arguments
for key in ["model", "block-size", "max-model-len"]:
assert f"--{key}" in vllm_worker_args
def test_service_config_without_common_configs():
# Reset singleton instance
ServiceConfig._instance = None
# Set environment variable with config that DOESN'T include common-configs
os.environ[
"DYNAMO_SERVICE_CONFIG"
] = """
{
"Common": {
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"block-size": 64,
"max-model-len": 16384
},
"VllmWorker": {
"enforce-eager": true
}
}
"""
# Get arguments and verify common configs are NOT included
service_config = ServiceConfig.get_instance()
vllm_worker_args = service_config.as_args("VllmWorker")
# Check that none of the common configs appear in arguments
for key in ["model", "block-size", "max-model-len"]:
assert f"--{key}" not in vllm_worker_args
def test_service_config_with_direct_configs():
# Reset singleton instance
ServiceConfig._instance = None
# Set environment variable with direct configs (no Common section reference)
os.environ[
"DYNAMO_SERVICE_CONFIG"
] = """
{
"VllmWorker": {
"enforce-eager": true,
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"block-size": 64,
"max-model-len": 16384
}
}
"""
# Get arguments and verify direct configs are included
service_config = ServiceConfig.get_instance()
vllm_worker_args = service_config.as_args("VllmWorker")
# Check that each config appears in the arguments
for key in ["model", "block-size", "max-model-len"]:
assert f"--{key}" in vllm_worker_args
def test_service_config_override_common_configs():
# Reset singleton instance
ServiceConfig._instance = None
# Set environment variable with config that includes common-configs
# overridden by the subscribing config
os.environ[
"DYNAMO_SERVICE_CONFIG"
] = """
{
"Common": {
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"block-size": 64,
"max-model-len": 16384
},
"VllmWorker": {
"enforce-eager": true,
"block-size": 128,
"common-configs": ["model", "block-size", "max-model-len"]
}
}
"""
# Get arguments and verify common configs are included
service_config = ServiceConfig.get_instance()
vllm_worker_args = service_config.as_args("VllmWorker")
# Check that each common config appears in the arguments
for key in ["model", "block-size", "max-model-len"]:
assert f"--{key}" in vllm_worker_args
assert vllm_worker_args[vllm_worker_args.index("--block-size") + 1] == "128"
...@@ -79,4 +79,4 @@ async def test_pipeline(): ...@@ -79,4 +79,4 @@ async def test_pipeline():
if attempt == max_retries - 1: if attempt == max_retries - 1:
raise raise
print(f"Attempt {attempt + 1} failed, retrying...") print(f"Attempt {attempt + 1} failed, retrying...")
await asyncio.sleep(1) await asyncio.sleep(3)
...@@ -162,36 +162,36 @@ This will print out something like ...@@ -162,36 +162,36 @@ This will print out something like
```bash ```bash
Service Configuration: Service Configuration:
{ {
"Common": {
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"block-size": 64,
"max-model-len": 16384,
},
"Frontend": { "Frontend": {
"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"endpoint": "dynamo.Processor.chat/completions", "endpoint": "dynamo.Processor.chat/completions",
"port": 8000 "port": 8000
}, },
"Processor": { "Processor": {
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "router": "round-robin",
"block-size": 64, "common-configs": [model, block-size, max-model-len]
"max-model-len": 16384,
"router": "round-robin"
}, },
"VllmWorker": { "VllmWorker": {
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"enforce-eager": true, "enforce-eager": true,
"block-size": 64,
"max-model-len": 16384,
"max-num-batched-tokens": 16384, "max-num-batched-tokens": 16384,
"enable-prefix-caching": true, "enable-prefix-caching": true,
"router": "random", "router": "random",
"tensor-parallel-size": 1, "tensor-parallel-size": 1,
"ServiceArgs": { "ServiceArgs": {
"workers": 1 "workers": 1
} },
"common-configs": [model, block-size, max-model-len]
} }
} }
Environment Variable that would be set: Environment Variable that would be set:
DYNAMO_SERVICE_CONFIG={"Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64, DYNAMO_SERVICE_CONFIG={"Common": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64, "max-model-len": 16384}, "Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"router": "round-robin", "common-configs": ["model", "block-size", "max-model-len"]}, "VllmWorker": {"enforce-eager": true, "max-num-batched-tokens": 16384, "enable-prefix-caching":
"max-model-len": 16384, "router": "round-robin"}, "VllmWorker": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "enforce-eager": true, "block-size": 64, "max-model-len": 16384, "max-num-batched-tokens": 16384, "enable-prefix-caching": true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}, "common-configs": ["model", "block-size", "max-model-len"]}}
true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}}}
``` ```
You can override any of these configuration options by passing in CLI flags to serve. For example, to change the routing strategy, you can run You can override any of these configuration options by passing in CLI flags to serve. For example, to change the routing strategy, you can run
......
...@@ -43,7 +43,7 @@ def parse_args(service_name, prefix) -> Namespace: ...@@ -43,7 +43,7 @@ def parse_args(service_name, prefix) -> Namespace:
help="Minimum number of workers required before proceeding", help="Minimum number of workers required before proceeding",
) )
parser.add_argument( parser.add_argument(
"--model-name", "--model",
type=str, type=str,
default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
help="Model that is being served", help="Model that is being served",
......
...@@ -12,6 +12,10 @@ ...@@ -12,6 +12,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
max-model-len: 16384
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
...@@ -19,16 +23,11 @@ Frontend: ...@@ -19,16 +23,11 @@ Frontend:
port: 8000 port: 8000
Processor: Processor:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
max-model-len: 16384
router: round-robin router: round-robin
common-configs: [model, block-size, max-model-len]
VllmWorker: VllmWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
enforce-eager: true enforce-eager: true
block-size: 64
max-model-len: 16384
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
enable-prefix-caching: true enable-prefix-caching: true
router: random router: random
...@@ -37,3 +36,4 @@ VllmWorker: ...@@ -37,3 +36,4 @@ VllmWorker:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len]
...@@ -12,6 +12,12 @@ ...@@ -12,6 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
router: kv
block-size: 64
max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
...@@ -19,26 +25,19 @@ Frontend: ...@@ -19,26 +25,19 @@ Frontend:
port: 8000 port: 8000
Processor: Processor:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B common-configs: [model, block-size, max-model-len, router]
block-size: 64
max-model-len: 16384
router: kv
Router: Router:
model-name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
min-workers: 1 min-workers: 1
common-configs: [model]
VllmWorker: VllmWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
enforce-eager: true enforce-eager: true
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
block-size: 64
max-model-len: 16384
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
enable-prefix-caching: true enable-prefix-caching: true
router: kv
tensor-parallel-size: 1 tensor-parallel-size: 1
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, router, kv-transfer-config]
...@@ -12,7 +12,11 @@ ...@@ -12,7 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
...@@ -20,13 +24,10 @@ Frontend: ...@@ -20,13 +24,10 @@ Frontend:
port: 8000 port: 8000
Processor: Processor:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
router: round-robin router: round-robin
common-configs: [model, block-size]
VllmWorker: VllmWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
max-model-len: 16384
remote-prefill: true remote-prefill: true
conditional-disagg: true conditional-disagg: true
max-local-prefill-length: 10 max-local-prefill-length: 10
...@@ -35,13 +36,12 @@ VllmWorker: ...@@ -35,13 +36,12 @@ VllmWorker:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config]
PrefillWorker: PrefillWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
max-model-len: 16384
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config]
...@@ -12,6 +12,12 @@ ...@@ -12,6 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
Common:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
max-model-len: 16384
router: kv
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
...@@ -19,41 +25,30 @@ Frontend: ...@@ -19,41 +25,30 @@ Frontend:
port: 8000 port: 8000
Processor: Processor:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B common-configs: [model, block-size, max-model-len, router]
block-size: 64
max-model-len: 16384
router: kv
Router: Router:
model-name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
min-workers: 1 min-workers: 1
common-configs: [model]
VllmWorker: VllmWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
block-size: 64
max-model-len: 16384
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
remote-prefill: true remote-prefill: true
conditional-disagg: true conditional-disagg: true
max-local-prefill-length: 10 max-local-prefill-length: 10
max-prefill-queue-size: 2 max-prefill-queue-size: 2
tensor-parallel-size: 1 tensor-parallel-size: 1
router: kv
enable-prefix-caching: true enable-prefix-caching: true
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, router, kv-transfer-config]
# TODO - set all of these but model as default
PrefillWorker: PrefillWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
block-size: 64
max-model-len: 16384
max-num-batched-tokens: 16384 max-num-batched-tokens: 16384
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: 1
common-configs: [model, block-size, max-model-len, kv-transfer-config]
...@@ -89,7 +89,7 @@ pub enum PromptContextMixin { ...@@ -89,7 +89,7 @@ pub enum PromptContextMixin {
/// Support OAI Chat Messages and Tools /// Support OAI Chat Messages and Tools
OaiChat, OaiChat,
/// Enables templates with `{{datatime}}` to be rendered with the current date and time. /// Enables templates with `{{datetime}}` to be rendered with the current date and time.
Llama3DateTime, Llama3DateTime,
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment