Unverified Commit a04b5631 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

feat: support AIC DGD gen call (WILL BREAK DGDR) (#6216)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 7b16480a
...@@ -6,7 +6,8 @@ from typing import Tuple ...@@ -6,7 +6,8 @@ from typing import Tuple
import yaml import yaml
from benchmarks.profiler.utils.config import ( from dynamo.planner.defaults import SubComponentType
from dynamo.profiler.utils.config import (
Config, Config,
append_argument, append_argument,
break_arguments, break_arguments,
...@@ -18,9 +19,12 @@ from benchmarks.profiler.utils.config import ( ...@@ -18,9 +19,12 @@ from benchmarks.profiler.utils.config import (
update_image, update_image,
validate_and_get_worker_args, validate_and_get_worker_args,
) )
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier from dynamo.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import DYNAMO_RUN_DEFAULT_PORT, EngineType from dynamo.profiler.utils.defaults import (
from dynamo.planner.defaults import SubComponentType DYNAMO_RUN_DEFAULT_PORT,
EngineType,
resolve_deploy_path,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -32,8 +36,12 @@ formatter = logging.Formatter( ...@@ -32,8 +36,12 @@ formatter = logging.Formatter(
console_handler.setFormatter(formatter) console_handler.setFormatter(formatter)
logger.addHandler(console_handler) logger.addHandler(console_handler)
DEFAULT_VLLM_DISAGG_CONFIG_PATH = resolve_deploy_path(
DEFAULT_VLLM_CONFIG_PATH = "examples/backends/vllm/deploy/disagg.yaml" "examples/backends/vllm/deploy/disagg.yaml"
)
DEFAULT_VLLM_AGG_CONFIG_PATH = resolve_deploy_path(
"examples/backends/vllm/deploy/agg.yaml"
)
class VllmV1ConfigModifier(BaseConfigModifier): class VllmV1ConfigModifier(BaseConfigModifier):
...@@ -42,8 +50,13 @@ class VllmV1ConfigModifier(BaseConfigModifier): ...@@ -42,8 +50,13 @@ class VllmV1ConfigModifier(BaseConfigModifier):
WORKER_MODEL_PATH_ARG = "--model" WORKER_MODEL_PATH_ARG = "--model"
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls, mode: str = "disagg") -> dict:
with open(DEFAULT_VLLM_CONFIG_PATH, "r") as f: path = (
DEFAULT_VLLM_AGG_CONFIG_PATH
if mode == "agg"
else DEFAULT_VLLM_DISAGG_CONFIG_PATH
)
with open(path, "r") as f:
return yaml.safe_load(f) return yaml.safe_load(f)
@classmethod @classmethod
......
...@@ -13,8 +13,20 @@ ...@@ -13,8 +13,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
from enum import Enum from enum import Enum
def resolve_deploy_path(rel_path: str) -> str:
"""Resolve a deploy YAML path relative to the dynamo workspace root.
Uses get_workspace_dir() which handles repo root, container, and env var cases.
"""
from dynamo.common.utils.paths import get_workspace_dir
return os.path.join(get_workspace_dir(), rel_path)
DYNAMO_RUN_DEFAULT_PORT = 8000 DYNAMO_RUN_DEFAULT_PORT = 8000
# set a decode maximum concurrency due to limits of profiling tools # set a decode maximum concurrency due to limits of profiling tools
......
...@@ -21,19 +21,19 @@ from typing import Any, Optional ...@@ -21,19 +21,19 @@ from typing import Any, Optional
import numpy as np import numpy as np
import yaml import yaml
from benchmarks.profiler.utils.config import ( from dynamo.common.utils.paths import get_workspace_dir
from dynamo.planner.defaults import MockerComponentName, SubComponentType
from dynamo.profiler.utils.config import (
Config, Config,
DgdPlannerServiceConfig, DgdPlannerServiceConfig,
set_argument_value, set_argument_value,
) )
from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from benchmarks.profiler.utils.config_modifiers.parallelization_mapping import ( from dynamo.profiler.utils.config_modifiers.parallelization_mapping import (
ParallelizationMapping, ParallelizationMapping,
apply_parallel_mapping_to_config, apply_parallel_mapping_to_config,
) )
from benchmarks.profiler.utils.planner_utils import build_planner_args_from_namespace from dynamo.profiler.utils.planner_utils import build_planner_args_from_namespace
from dynamo.common.utils.paths import get_workspace_dir
from dynamo.planner.defaults import MockerComponentName, SubComponentType
# Path to mocker disagg config relative to workspace # Path to mocker disagg config relative to workspace
MOCKER_DISAGG_CONFIG_PATH = "examples/backends/mocker/deploy/disagg.yaml" MOCKER_DISAGG_CONFIG_PATH = "examples/backends/mocker/deploy/disagg.yaml"
......
...@@ -21,8 +21,8 @@ import numpy as np ...@@ -21,8 +21,8 @@ import numpy as np
from matplotlib import cm from matplotlib import cm
from scipy.interpolate import griddata from scipy.interpolate import griddata
from benchmarks.profiler.utils.defaults import DEFAULT_GPU_COST_PER_HOUR from dynamo.profiler.utils.defaults import DEFAULT_GPU_COST_PER_HOUR
from benchmarks.profiler.utils.pareto import compute_pareto from dynamo.profiler.utils.pareto import compute_pareto
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -6,10 +6,10 @@ from typing import Callable, Optional, Tuple ...@@ -6,10 +6,10 @@ from typing import Callable, Optional, Tuple
import numpy as np import numpy as np
from benchmarks.profiler.utils.aiperf import get_decode_itl_and_thpt_per_gpu from dynamo.profiler.utils.aiperf import get_decode_itl_and_thpt_per_gpu
from benchmarks.profiler.utils.defaults import DECODE_MAX_CONCURRENCY from dynamo.profiler.utils.defaults import DECODE_MAX_CONCURRENCY
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from benchmarks.profiler.utils.plot import plot_decode_3d_surface from dynamo.profiler.utils.plot import plot_decode_3d_surface
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -6,9 +6,9 @@ from typing import Callable, Optional ...@@ -6,9 +6,9 @@ from typing import Callable, Optional
import numpy as np import numpy as np
from benchmarks.profiler.utils.aiperf import get_prefill_ttft from dynamo.profiler.utils.aiperf import get_prefill_ttft
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator from dynamo.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
from benchmarks.profiler.utils.plot import plot_prefill_interpolation from dynamo.profiler.utils.plot import plot_prefill_interpolation
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -8,8 +8,8 @@ from typing import Any, Dict ...@@ -8,8 +8,8 @@ from typing import Any, Dict
import yaml import yaml
from benchmarks.profiler.utils.planner_utils import add_planner_arguments_to_parser from dynamo.profiler.utils.planner_utils import add_planner_arguments_to_parser
from benchmarks.profiler.utils.search_space_autogen import auto_generate_search_space from dynamo.profiler.utils.search_space_autogen import auto_generate_search_space
def _get(cfg: Dict[str, Any], camel: str, snake: str, default: Any = None) -> Any: def _get(cfg: Dict[str, Any], camel: str, snake: str, default: Any = None) -> Any:
......
...@@ -8,9 +8,9 @@ import os ...@@ -8,9 +8,9 @@ import os
import yaml import yaml
from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from benchmarks.profiler.utils.model_info import ModelInfo, get_model_info
from deploy.utils.gpu_inventory import get_gpu_summary from deploy.utils.gpu_inventory import get_gpu_summary
from dynamo.profiler.utils.config_modifiers import CONFIG_MODIFIERS
from dynamo.profiler.utils.model_info import ModelInfo, get_model_info
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -5,7 +5,7 @@ import json ...@@ -5,7 +5,7 @@ import json
import logging import logging
import queue import queue
from benchmarks.profiler.webui.utils import ( from dynamo.profiler.webui.utils import (
add_profiling_error, add_profiling_error,
clear_profiling_errors, clear_profiling_errors,
create_gradio_interface, create_gradio_interface,
......
...@@ -20,12 +20,12 @@ from aiconfigurator.webapp.components.profiling import ( ...@@ -20,12 +20,12 @@ from aiconfigurator.webapp.components.profiling import (
load_profiling_javascript, load_profiling_javascript,
) )
from benchmarks.profiler.utils.dgd_generation import ( from dynamo.profiler.utils.dgd_generation import (
generate_decode_service_config_preview, generate_decode_service_config_preview,
generate_prefill_decode_services_config_preview, generate_prefill_decode_services_config_preview,
generate_prefill_service_config_preview, generate_prefill_service_config_preview,
) )
from benchmarks.profiler.utils.pareto import compute_pareto from dynamo.profiler.utils.pareto import compute_pareto
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -49,7 +49,7 @@ type ConfigMapKeySelector struct { ...@@ -49,7 +49,7 @@ type ConfigMapKeySelector struct {
// ProfilingConfigSpec defines configuration for the profiling process. // ProfilingConfigSpec defines configuration for the profiling process.
// This structure maps directly to the profile_sla.py config format. // This structure maps directly to the profile_sla.py config format.
// See benchmarks/profiler/utils/profiler_argparse.py for the complete schema. // See dynamo/profiler/utils/profiler_argparse.py for the complete schema.
type ProfilingConfigSpec struct { type ProfilingConfigSpec struct {
// Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler. // Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler.
// The profiler will validate the configuration and report any errors. // The profiler will validate the configuration and report any errors.
......
...@@ -28,7 +28,7 @@ spec: ...@@ -28,7 +28,7 @@ spec:
profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1" profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
# ProfilingConfig maps directly to the profile_sla.py config format # ProfilingConfig maps directly to the profile_sla.py config format
# See benchmarks/profiler/utils/profiler_argparse.py for complete schema # See dynamo/profiler/utils/profiler_argparse.py for complete schema
# Note: deployment.model and engine.backend are automatically set from model and backend above # Note: deployment.model and engine.backend are automatically set from model and backend above
profilingConfig: profilingConfig:
config: config:
......
...@@ -1082,7 +1082,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context. ...@@ -1082,7 +1082,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
profilerContainer := corev1.Container{ profilerContainer := corev1.Container{
Name: ContainerNameProfiler, Name: ContainerNameProfiler,
Image: imageName, Image: imageName,
Command: []string{"python", "-m", "benchmarks.profiler.profile_sla"}, Command: []string{"python", "-m", "dynamo.profiler.profile_sla"},
Args: profilerArgs, Args: profilerArgs,
Env: profilerEnv, Env: profilerEnv,
VolumeMounts: volumeMounts, VolumeMounts: volumeMounts,
......
...@@ -594,6 +594,6 @@ async def main(): ...@@ -594,6 +594,6 @@ async def main():
# run with: # run with:
# uv run benchmarks/profiler/utils/dynamo_deployment.py -n mo-dyn -f ./examples/vllm/deploy/agg.yaml -l ./client_logs # uv run components/src/dynamo/profiler/utils/dynamo_deployment.py -n mo-dyn -f ./examples/vllm/deploy/agg.yaml -l ./client_logs
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment