Unverified Commit a04b5631 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 7b16480a
...@@ -110,6 +110,7 @@ deploy: ...@@ -110,6 +110,7 @@ deploy:
planner: planner:
- 'components/src/dynamo/planner/**' - 'components/src/dynamo/planner/**'
- 'tests/planner/**' - 'tests/planner/**'
- 'components/src/dynamo/profiler/**'
- 'components/src/dynamo/global_router/**' - 'components/src/dynamo/global_router/**'
vllm: vllm:
......
...@@ -28,7 +28,7 @@ CODEOWNERS @ai-dynamo/Devops ...@@ -28,7 +28,7 @@ CODEOWNERS @ai-dynamo/Devops
/components/src/dynamo/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops /components/src/dynamo/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
/components/src/dynamo/global_router/ @ai-dynamo/python-codeowners @ai-dynamo/Devops /components/src/dynamo/global_router/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
/examples/hierarchical_planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops /examples/hierarchical_planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
/benchmarks/profiler/ @ai-dynamo/python-codeowners @ai-dynamo/Devops /components/src/dynamo/profiler/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
/tests/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops /tests/planner/ @ai-dynamo/python-codeowners @ai-dynamo/Devops
# recipes # recipes
......
...@@ -142,7 +142,7 @@ Understanding Dynamo's architecture helps you find where to make changes. For th ...@@ -142,7 +142,7 @@ Understanding Dynamo's architecture helps you find where to make changes. For th
| **Planner** | Real-time performance tuning and worker scaling | `components/src/dynamo/planner/` | | **Planner** | Real-time performance tuning and worker scaling | `components/src/dynamo/planner/` |
| **Workers** | Backend engine integrations (vLLM, SGLang, TensorRT-LLM) | `components/src/dynamo/{vllm,sglang,trtllm}/` | | **Workers** | Backend engine integrations (vLLM, SGLang, TensorRT-LLM) | `components/src/dynamo/{vllm,sglang,trtllm}/` |
| **KV Cache Manager** | Multi-tier cache offloading (GPU → CPU → SSD → Object Storage) | `lib/bindings/kvbm/` | | **KV Cache Manager** | Multi-tier cache offloading (GPU → CPU → SSD → Object Storage) | `lib/bindings/kvbm/` |
| **SLA Profiler** | Benchmarking and SLA-driven configuration | `benchmarks/profiler/` | | **SLA Profiler** | Benchmarking and SLA-driven configuration | `components/src/dynamo/profiler/` |
### Communication Planes ### Communication Planes
......
...@@ -67,7 +67,7 @@ The profile results directory should contain `selected_prefill_interpolation/` a ...@@ -67,7 +67,7 @@ The profile results directory should contain `selected_prefill_interpolation/` a
To generate profiling data for your own model/hardware configuration, run the profiler (see [SLA-driven profiling documentation](../../../../docs/pages/components/profiler/profiler-guide.md) for details): To generate profiling data for your own model/hardware configuration, run the profiler (see [SLA-driven profiling documentation](../../../../docs/pages/components/profiler/profiler-guide.md) for details):
```bash ```bash
python benchmarks/profiler/profile_sla.py \ python components/src/dynamo/profiler/profile_sla.py \
--profile-config your_profile_config.yaml --profile-config your_profile_config.yaml
``` ```
......
...@@ -174,7 +174,7 @@ class KubernetesConnector(PlannerConnector): ...@@ -174,7 +174,7 @@ class KubernetesConnector(PlannerConnector):
self.graph_deployment_name self.graph_deployment_name
) )
# TODO: benchmarks/profiler/utils/config.py already contains DGD config parsing # TODO: dynamo/profiler/utils/config.py already contains DGD config parsing
# and model name logic, should consolidate # and model name logic, should consolidate
prefill_model_name = None prefill_model_name = None
decode_model_name = None decode_model_name = None
......
...@@ -6,8 +6,8 @@ SPDX-License-Identifier: Apache-2.0 ...@@ -6,8 +6,8 @@ SPDX-License-Identifier: Apache-2.0
# Profiler # Profiler
Documentation for the Dynamo Profiler has moved to [docs/pages/components/profiler/](../../docs/pages/components/profiler/README.md). Documentation for the Dynamo Profiler has moved to [docs/pages/components/profiler/](../../../../docs/pages/components/profiler/README.md).
- [Profiler Overview](../../docs/pages/components/profiler/README.md) - [Profiler Overview](../../../../docs/pages/components/profiler/README.md)
- [Profiler Guide](../../docs/pages/components/profiler/profiler-guide.md) - [Profiler Guide](../../../../docs/pages/components/profiler/profiler-guide.md)
- [Profiler Examples](../../docs/pages/components/profiler/profiler-examples.md) - [Profiler Examples](../../../../docs/pages/components/profiler/profiler-examples.md)
...@@ -5,9 +5,9 @@ import argparse ...@@ -5,9 +5,9 @@ import argparse
import logging import logging
import os import os
from benchmarks.profiler.utils.defaults import EngineType from dynamo.profiler.utils.defaults import EngineType
from benchmarks.profiler.utils.profile_decode import profile_decode from dynamo.profiler.utils.profile_decode import profile_decode
from benchmarks.profiler.utils.profile_prefill import profile_prefill from dynamo.profiler.utils.profile_prefill import profile_prefill
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -22,49 +22,46 @@ from dataclasses import dataclass, field ...@@ -22,49 +22,46 @@ from dataclasses import dataclass, field
import numpy as np import numpy as np
import yaml import yaml
from benchmarks.profiler.utils.aiperf import ( from deploy.utils.dynamo_deployment import (
DynamoDeploymentClient,
cleanup_remaining_deployments,
)
from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.aiperf import (
get_decode_itl_and_thpt_per_gpu, get_decode_itl_and_thpt_per_gpu,
get_prefill_ttft, get_prefill_ttft,
) )
from benchmarks.profiler.utils.config import Config, get_service_name_by_type from dynamo.profiler.utils.config import Config, get_service_name_by_type
from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from benchmarks.profiler.utils.config_modifiers.parallelization_mapping import ( from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
ParallelizationMapping, ParallelizationMapping,
apply_parallel_mapping_to_config, apply_parallel_mapping_to_config,
get_candidate_parallel_mappings, get_candidate_parallel_mappings,
) )
from benchmarks.profiler.utils.defaults import EngineType from dynamo.profiler.utils.defaults import EngineType
from benchmarks.profiler.utils.dgd_generation import generate_dgd_config_with_planner from dynamo.profiler.utils.dgd_generation import generate_dgd_config_with_planner
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from benchmarks.profiler.utils.plot import ( from dynamo.profiler.utils.plot import (
plot_decode_performance, plot_decode_performance,
plot_pd_joint_results, plot_pd_joint_results,
plot_prefill_performance, plot_prefill_performance,
) )
from benchmarks.profiler.utils.profile_decode import ( from dynamo.profiler.utils.profile_decode import (
get_num_request_range, get_num_request_range,
profile_decode, profile_decode,
profile_decode_aiconfigurator, profile_decode_aiconfigurator,
) )
from benchmarks.profiler.utils.profile_prefill import ( from dynamo.profiler.utils.profile_prefill import (
profile_prefill, profile_prefill,
profile_prefill_aiconfigurator, profile_prefill_aiconfigurator,
) )
from benchmarks.profiler.utils.profiler_argparse import create_profiler_parser from dynamo.profiler.utils.profiler_argparse import create_profiler_parser
from benchmarks.profiler.utils.profiler_status import ( from dynamo.profiler.utils.profiler_status import ProfilerStatus, write_profiler_status
ProfilerStatus, from dynamo.profiler.webui.select_config import (
write_profiler_status,
)
from benchmarks.profiler.webui.select_config import (
add_profiling_error, add_profiling_error,
clear_profiling_errors, clear_profiling_errors,
pick_config_with_webui, pick_config_with_webui,
) )
from deploy.utils.dynamo_deployment import (
DynamoDeploymentClient,
cleanup_remaining_deployments,
)
from dynamo.planner.defaults import SubComponentType
@dataclass @dataclass
......
...@@ -20,7 +20,7 @@ import random ...@@ -20,7 +20,7 @@ import random
import subprocess import subprocess
from typing import Optional, Tuple from typing import Optional, Tuple
from benchmarks.profiler.utils.defaults import ( from dynamo.profiler.utils.defaults import (
AIPERF_PREFILL_ATTN_DP_NUM_REQ_RATIO, AIPERF_PREFILL_ATTN_DP_NUM_REQ_RATIO,
AIPERF_PREFILL_BENCHMARK_OSL, AIPERF_PREFILL_BENCHMARK_OSL,
AIPERF_WARMUP_REQUEST_PER_DP_RANK, AIPERF_WARMUP_REQUEST_PER_DP_RANK,
......
...@@ -16,13 +16,13 @@ ...@@ -16,13 +16,13 @@
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
if TYPE_CHECKING: if TYPE_CHECKING:
from benchmarks.profiler.utils.config_modifiers.protocol import ( from dynamo.profiler.utils.config_modifiers.protocol import (
ConfigModifierProtocol, ConfigModifierProtocol,
) )
from benchmarks.profiler.utils.config_modifiers.sglang import SGLangConfigModifier from dynamo.profiler.utils.config_modifiers.sglang import SGLangConfigModifier
from benchmarks.profiler.utils.config_modifiers.trtllm import TrtllmConfigModifier from dynamo.profiler.utils.config_modifiers.trtllm import TrtllmConfigModifier
from benchmarks.profiler.utils.config_modifiers.vllm import VllmV1ConfigModifier from dynamo.profiler.utils.config_modifiers.vllm import VllmV1ConfigModifier
CONFIG_MODIFIERS: dict[str, type["ConfigModifierProtocol"]] = { CONFIG_MODIFIERS: dict[str, type["ConfigModifierProtocol"]] = {
"vllm": VllmV1ConfigModifier, "vllm": VllmV1ConfigModifier,
......
...@@ -6,12 +6,9 @@ import logging ...@@ -6,12 +6,9 @@ import logging
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from benchmarks.profiler.utils.defaults import PREFILL_MAX_NUM_TOKENS
from benchmarks.profiler.utils.model_info import (
MOE_ADDITIONAL_TP_ARCHITECTURES,
ModelInfo,
)
from dynamo.planner.defaults import SubComponentType from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.defaults import PREFILL_MAX_NUM_TOKENS
from dynamo.profiler.utils.model_info import MOE_ADDITIONAL_TP_ARCHITECTURES, ModelInfo
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -15,18 +15,23 @@ ...@@ -15,18 +15,23 @@
from __future__ import annotations from __future__ import annotations
import logging
from typing import Any, Protocol, Tuple from typing import Any, Protocol, Tuple
from benchmarks.profiler.utils.config import ( from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.config import (
Config, Config,
Container, Container,
PodSpec, PodSpec,
ServiceResources,
break_arguments, break_arguments,
get_service_name_by_type, get_service_name_by_type,
set_argument_value, set_argument_value,
update_image,
) )
from benchmarks.profiler.utils.defaults import EngineType from dynamo.profiler.utils.defaults import EngineType
from dynamo.planner.defaults import SubComponentType
logger = logging.getLogger(__name__)
class ConfigModifierProtocol(Protocol): class ConfigModifierProtocol(Protocol):
...@@ -93,7 +98,7 @@ class ConfigModifierProtocol(Protocol): ...@@ -93,7 +98,7 @@ class ConfigModifierProtocol(Protocol):
... ...
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls, mode: str = "disagg") -> dict:
... ...
@classmethod @classmethod
...@@ -407,3 +412,197 @@ class BaseConfigModifier: ...@@ -407,3 +412,197 @@ class BaseConfigModifier:
) )
return cfg.model_dump() return cfg.model_dump()
@classmethod
def build_dgd_config(
cls,
mode: str,
model_name: str,
image: str,
# Disagg workers (used when mode=="disagg")
prefill_cli_args: list[str] | None = None,
prefill_replicas: int = 1,
prefill_gpus: int = 1,
decode_cli_args: list[str] | None = None,
decode_replicas: int = 1,
decode_gpus: int = 1,
# Agg worker (used when mode=="agg")
agg_cli_args: list[str] | None = None,
agg_replicas: int = 1,
agg_gpus: int = 1,
# Optional
namespace: str | None = None,
model_path: str | None = None,
pvc_name: str | None = None,
pvc_mount_path: str | None = None,
) -> dict:
"""
Build a complete DynamoGraphDeployment config by loading a base YAML
and injecting pre-computed CLI args, model, image, replicas, and GPU resources.
This is intended for use by external tools (e.g. AIConfigurator) that
have already computed the per-worker CLI arguments and just need them
placed into a valid DGD config structure.
Args:
mode: "agg" or "disagg"
model_name: Model name / HuggingFace ID (e.g. "Qwen/Qwen3-32B")
image: Container image for all services
prefill_cli_args: Pre-computed CLI args list for prefill worker
prefill_replicas: Number of prefill worker replicas
prefill_gpus: GPUs per prefill worker
decode_cli_args: Pre-computed CLI args list for decode worker
decode_replicas: Number of decode worker replicas
decode_gpus: GPUs per decode worker
agg_cli_args: Pre-computed CLI args list for agg worker
agg_replicas: Number of agg worker replicas
agg_gpus: GPUs per agg worker
namespace: K8s namespace (optional)
model_path: Model path if different from model_name (e.g. PVC path)
pvc_name: PVC claim name for model cache (optional)
pvc_mount_path: PVC mount path (optional)
Returns:
Complete DGD config dict ready for YAML serialization
Raises:
ValueError: If mode is not "agg" or "disagg"
"""
if mode not in ("agg", "disagg"):
raise ValueError(f"Invalid mode '{mode}': must be 'agg' or 'disagg'")
config = cls.load_default_config(mode=mode)
cfg = Config.model_validate(config)
# Set metadata
cfg.metadata.name = f"{cls.BACKEND}-{mode}"
if namespace and hasattr(cfg.metadata, "namespace"):
cfg.metadata.namespace = namespace
# Update image for all services
config = update_image(cfg.model_dump(), image)
cfg = Config.model_validate(config)
if mode == "disagg":
cls._apply_disagg_workers(
cfg,
prefill_cli_args=prefill_cli_args or [],
prefill_replicas=prefill_replicas,
prefill_gpus=prefill_gpus,
decode_cli_args=decode_cli_args or [],
decode_replicas=decode_replicas,
decode_gpus=decode_gpus,
)
else:
cls._apply_agg_worker(
cfg,
agg_cli_args=agg_cli_args or [],
agg_replicas=agg_replicas,
agg_gpus=agg_gpus,
)
# Update model (handles worker args + frontend patching)
effective_model_path = model_path or model_name
if pvc_name and pvc_mount_path:
result = cls.update_model_from_pvc(
cfg.model_dump(),
model_name=model_name,
pvc_name=pvc_name,
pvc_mount_path=pvc_mount_path,
pvc_path="",
)
else:
result = cls.update_model(
cfg.model_dump(),
model_name=model_name,
model_path=effective_model_path,
)
return result
_NON_WORKER_SERVICES = {"Frontend", "Planner"}
@classmethod
def _resolve_service_name(
cls,
cfg: Config,
component_type: SubComponentType,
) -> str | None:
"""Resolve the service name for a given component type, with fallback."""
try:
return get_service_name_by_type(cfg, cls.BACKEND, component_type)
except Exception:
# Fallback: find the first worker service (skip Frontend, Planner)
for name in cfg.spec.services:
if name not in cls._NON_WORKER_SERVICES:
return name
return None
@staticmethod
def _apply_worker_config(
service: Any,
cli_args: list[str],
replicas: int,
gpus: int,
) -> None:
"""Apply CLI args, replicas, and GPU resources to a single worker service."""
service.replicas = replicas
if service.resources is None:
service.resources = ServiceResources()
if service.resources.limits is None:
service.resources.limits = {}
service.resources.limits["gpu"] = str(gpus)
if service.extraPodSpec and service.extraPodSpec.mainContainer:
service.extraPodSpec.mainContainer.args = list(cli_args)
@classmethod
def _apply_disagg_workers(
cls,
cfg: Config,
prefill_cli_args: list[str],
prefill_replicas: int,
prefill_gpus: int,
decode_cli_args: list[str],
decode_replicas: int,
decode_gpus: int,
) -> None:
"""Apply CLI args, replicas, and GPU resources to disagg worker services."""
for sct, cli_args, replicas, gpus in [
(
SubComponentType.PREFILL,
prefill_cli_args,
prefill_replicas,
prefill_gpus,
),
(SubComponentType.DECODE, decode_cli_args, decode_replicas, decode_gpus),
]:
svc_name = cls._resolve_service_name(cfg, sct)
if svc_name is None or svc_name not in cfg.spec.services:
logger.warning(
"Could not find %s service for backend %s, skipping",
sct.value,
cls.BACKEND,
)
continue
cls._apply_worker_config(
cfg.spec.services[svc_name], cli_args, replicas, gpus
)
@classmethod
def _apply_agg_worker(
cls,
cfg: Config,
agg_cli_args: list[str],
agg_replicas: int,
agg_gpus: int,
) -> None:
"""Apply CLI args, replicas, and GPU resources to the agg worker service."""
svc_name = cls._resolve_service_name(cfg, SubComponentType.DECODE)
if svc_name is None or svc_name not in cfg.spec.services:
logger.warning("Could not find worker service for agg mode")
return
cls._apply_worker_config(
cfg.spec.services[svc_name], agg_cli_args, agg_replicas, agg_gpus
)
...@@ -7,7 +7,8 @@ from typing import Tuple ...@@ -7,7 +7,8 @@ from typing import Tuple
import yaml import yaml
from benchmarks.profiler.utils.config import ( from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.config import (
Config, Config,
append_argument, append_argument,
break_arguments, break_arguments,
...@@ -19,9 +20,12 @@ from benchmarks.profiler.utils.config import ( ...@@ -19,9 +20,12 @@ from benchmarks.profiler.utils.config import (
update_image, update_image,
validate_and_get_worker_args, validate_and_get_worker_args,
) )
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType from dynamo.profiler.utils.defaults import (
from dynamo.planner.defaults import SubComponentType DYNAMO_RUN_DEFAULT_PORT,
EngineType,
resolve_deploy_path,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -33,16 +37,25 @@ formatter = logging.Formatter( ...@@ -33,16 +37,25 @@ formatter = logging.Formatter(
console_handler.setFormatter(formatter) console_handler.setFormatter(formatter)
logger.addHandler(console_handler) logger.addHandler(console_handler)
DEFAULT_SGLANG_DISAGG_CONFIG_PATH = resolve_deploy_path(
DEFAULT_SGLANG_CONFIG_PATH = "examples/backends/sglang/deploy/disagg.yaml" "examples/backends/sglang/deploy/disagg.yaml"
)
DEFAULT_SGLANG_AGG_CONFIG_PATH = resolve_deploy_path(
"examples/backends/sglang/deploy/agg.yaml"
)
class SGLangConfigModifier(BaseConfigModifier): class SGLangConfigModifier(BaseConfigModifier):
BACKEND = "sglang" BACKEND = "sglang"
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls, mode: str = "disagg") -> dict:
with open(DEFAULT_SGLANG_CONFIG_PATH, "r") as f: path = (
DEFAULT_SGLANG_AGG_CONFIG_PATH
if mode == "agg"
else DEFAULT_SGLANG_DISAGG_CONFIG_PATH
)
with open(path, "r") as f:
return yaml.safe_load(f) return yaml.safe_load(f)
@classmethod @classmethod
......
...@@ -8,7 +8,8 @@ from typing import Tuple ...@@ -8,7 +8,8 @@ from typing import Tuple
import yaml import yaml
from benchmarks.profiler.utils.config import ( from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.config import (
Config, Config,
append_argument, append_argument,
break_arguments, break_arguments,
...@@ -20,9 +21,12 @@ from benchmarks.profiler.utils.config import ( ...@@ -20,9 +21,12 @@ from benchmarks.profiler.utils.config import (
update_image, update_image,
validate_and_get_worker_args, validate_and_get_worker_args,
) )
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType from dynamo.profiler.utils.defaults import (
from dynamo.planner.defaults import SubComponentType DYNAMO_RUN_DEFAULT_PORT,
EngineType,
resolve_deploy_path,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -34,16 +38,25 @@ formatter = logging.Formatter( ...@@ -34,16 +38,25 @@ formatter = logging.Formatter(
console_handler.setFormatter(formatter) console_handler.setFormatter(formatter)
logger.addHandler(console_handler) logger.addHandler(console_handler)
DEFAULT_TRTLLM_DISAGG_CONFIG_PATH = resolve_deploy_path(
DEFAULT_TRTLLM_CONFIG_PATH = "examples/backends/trtllm/deploy/disagg.yaml" "examples/backends/trtllm/deploy/disagg.yaml"
)
DEFAULT_TRTLLM_AGG_CONFIG_PATH = resolve_deploy_path(
"examples/backends/trtllm/deploy/agg.yaml"
)
class TrtllmConfigModifier(BaseConfigModifier): class TrtllmConfigModifier(BaseConfigModifier):
BACKEND = "trtllm" BACKEND = "trtllm"
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls, mode: str = "disagg") -> dict:
with open(DEFAULT_TRTLLM_CONFIG_PATH, "r") as f: path = (
DEFAULT_TRTLLM_AGG_CONFIG_PATH
if mode == "agg"
else DEFAULT_TRTLLM_DISAGG_CONFIG_PATH
)
with open(path, "r") as f:
return yaml.safe_load(f) return yaml.safe_load(f)
@classmethod @classmethod
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment