Unverified Commit a04b5631 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 7b16480a
......@@ -6,7 +6,8 @@ from typing import Tuple
import yaml
from benchmarks.profiler.utils.config import (
from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.config import (
Config,
append_argument,
break_arguments,
......@@ -18,9 +19,12 @@ from benchmarks.profiler.utils.config import (
update_image,
validate_and_get_worker_args,
)
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType
from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from dynamo.profiler.utils.defaults import (
DYNAMO_RUN_DEFAULT_PORT,
EngineType,
resolve_deploy_path,
)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
......@@ -32,8 +36,12 @@ formatter = logging.Formatter(
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
DEFAULT_VLLM_CONFIG_PATH = "examples/backends/vllm/deploy/disagg.yaml"
DEFAULT_VLLM_DISAGG_CONFIG_PATH = resolve_deploy_path(
"examples/backends/vllm/deploy/disagg.yaml"
)
DEFAULT_VLLM_AGG_CONFIG_PATH = resolve_deploy_path(
"examples/backends/vllm/deploy/agg.yaml"
)
class VllmV1ConfigModifier(BaseConfigModifier):
......@@ -42,8 +50,13 @@ class VllmV1ConfigModifier(BaseConfigModifier):
WORKER_MODEL_PATH_ARG = "--model"
@classmethod
def load_default_config(cls) -> dict:
with open(DEFAULT_VLLM_CONFIG_PATH, "r") as f:
def load_default_config(cls, mode: str = "disagg") -> dict:
path = (
DEFAULT_VLLM_AGG_CONFIG_PATH
if mode == "agg"
else DEFAULT_VLLM_DISAGG_CONFIG_PATH
)
with open(path, "r") as f:
return yaml.safe_load(f)
@classmethod
......
......@@ -13,8 +13,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from enum import Enum
def resolve_deploy_path(rel_path: str) -> str:
"""Resolve a deploy YAML path relative to the dynamo workspace root.
Uses get_workspace_dir() which handles repo root, container, and env var cases.
"""
from dynamo.common.utils.paths import get_workspace_dir
return os.path.join(get_workspace_dir(), rel_path)
DYNAMO_RUN_DEFAULT_PORT = 8000
# set a decode maximum concurrency due to limits of profiling tools
......
......@@ -21,19 +21,19 @@ from typing import Any, Optional
import numpy as np
import yaml
from benchmarks.profiler.utils.config import (
from dynamo.common.utils.paths import get_workspace_dir
from dynamo.planner.defaults import MockerComponentName, SubComponentType
from dynamo.profiler.utils.config import (
Config,
DgdPlannerServiceConfig,
set_argument_value,
)
from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from benchmarks.profiler.utils.config_modifiers.parallelization_mapping import (
from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
ParallelizationMapping,
apply_parallel_mapping_to_config,
)
from benchmarks.profiler.utils.planner_utils import build_planner_args_from_namespace
from dynamo.common.utils.paths import get_workspace_dir
from dynamo.planner.defaults import MockerComponentName, SubComponentType
from dynamo.profiler.utils.planner_utils import build_planner_args_from_namespace
# Path to mocker disagg config relative to workspace
MOCKER_DISAGG_CONFIG_PATH = "examples/backends/mocker/deploy/disagg.yaml"
......
......@@ -21,8 +21,8 @@ import numpy as np
from matplotlib import cm
from scipy.interpolate import griddata
from benchmarks.profiler.utils.defaults import DEFAULT_GPU_COST_PER_HOUR
from benchmarks.profiler.utils.pareto import compute_pareto
from dynamo.profiler.utils.defaults import DEFAULT_GPU_COST_PER_HOUR
from dynamo.profiler.utils.pareto import compute_pareto
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
......
......@@ -6,10 +6,10 @@ from typing import Callable, Optional, Tuple
import numpy as np
from benchmarks.profiler.utils.aiperf import get_decode_itl_and_thpt_per_gpu
from benchmarks.profiler.utils.defaults import DECODE_MAX_CONCURRENCY
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from benchmarks.profiler.utils.plot import plot_decode_3d_surface
from dynamo.profiler.utils.aiperf import get_decode_itl_and_thpt_per_gpu
from dynamo.profiler.utils.defaults import DECODE_MAX_CONCURRENCY
from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from dynamo.profiler.utils.plot import plot_decode_3d_surface
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
......
......@@ -6,9 +6,9 @@ from typing import Callable, Optional
import numpy as np
from benchmarks.profiler.utils.aiperf import get_prefill_ttft
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from benchmarks.profiler.utils.plot import plot_prefill_interpolation
from dynamo.profiler.utils.aiperf import get_prefill_ttft
from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from dynamo.profiler.utils.plot import plot_prefill_interpolation
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
......
......@@ -8,8 +8,8 @@ from typing import Any, Dict
import yaml
from benchmarks.profiler.utils.planner_utils import add_planner_arguments_to_parser
from benchmarks.profiler.utils.search_space_autogen import auto_generate_search_space
from dynamo.profiler.utils.planner_utils import add_planner_arguments_to_parser
from dynamo.profiler.utils.search_space_autogen import auto_generate_search_space
def _get(cfg: Dict[str, Any], camel: str, snake: str, default: Any = None) -> Any:
......
......@@ -8,9 +8,9 @@ import os
import yaml
from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from benchmarks.profiler.utils.model_info import ModelInfo, get_model_info
from deploy.utils.gpu_inventory import get_gpu_summary
from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from dynamo.profiler.utils.model_info import ModelInfo, get_model_info
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
......
......@@ -5,7 +5,7 @@ import json
import logging
import queue
from benchmarks.profiler.webui.utils import (
from dynamo.profiler.webui.utils import (
add_profiling_error,
clear_profiling_errors,
create_gradio_interface,
......
......@@ -20,12 +20,12 @@ from aiconfigurator.webapp.components.profiling import (
load_profiling_javascript,
)
from benchmarks.profiler.utils.dgd_generation import (
from dynamo.profiler.utils.dgd_generation import (
generate_decode_service_config_preview,
generate_prefill_decode_services_config_preview,
generate_prefill_service_config_preview,
)
from benchmarks.profiler.utils.pareto import compute_pareto
from dynamo.profiler.utils.pareto import compute_pareto
logger = logging.getLogger(__name__)
......
......@@ -49,7 +49,7 @@ type ConfigMapKeySelector struct {
// ProfilingConfigSpec defines configuration for the profiling process.
// This structure maps directly to the profile_sla.py config format.
// See benchmarks/profiler/utils/profiler_argparse.py for the complete schema.
// See dynamo/profiler/utils/profiler_argparse.py for the complete schema.
type ProfilingConfigSpec struct {
// Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler.
// The profiler will validate the configuration and report any errors.
......
......@@ -28,7 +28,7 @@ spec:
profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
# ProfilingConfig maps directly to the profile_sla.py config format
# See benchmarks/profiler/utils/profiler_argparse.py for complete schema
# See dynamo/profiler/utils/profiler_argparse.py for complete schema
# Note: deployment.model and engine.backend are automatically set from model and backend above
profilingConfig:
config:
......
......@@ -1082,7 +1082,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
profilerContainer := corev1.Container{
Name: ContainerNameProfiler,
Image: imageName,
Command: []string{"python", "-m", "benchmarks.profiler.profile_sla"},
Command: []string{"python", "-m", "dynamo.profiler.profile_sla"},
Args: profilerArgs,
Env: profilerEnv,
VolumeMounts: volumeMounts,
......
......@@ -594,6 +594,6 @@ async def main():
# run with:
# uv run benchmarks/profiler/utils/dynamo_deployment.py -n mo-dyn -f ./examples/vllm/deploy/agg.yaml -l ./client_logs
# uv run components/src/dynamo/profiler/utils/dynamo_deployment.py -n mo-dyn -f ./examples/vllm/deploy/agg.yaml -l ./client_logs
if __name__ == "__main__":
asyncio.run(main())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment