Unverified Commit 0c6a8024 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: resolve 'auto' backend to concrete value in every situation (#7158)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent 3cebc864
......@@ -99,7 +99,7 @@ def _generate_dgd_from_pick(
return None
# in naive mode, use vllm as the default backend
# Fallback backend when AIC simulation is unavailable and no concrete backend is specified.
_DEFAULT_NAIVE_BACKEND = "vllm"
......@@ -110,19 +110,10 @@ def _run_naive_fallback(
system: str,
backend: str,
) -> dict:
"""Handle the AIC-unsupported path via naive config generation.
Builds naive generator params (CLI args, parallelism) and then
assembles the DGD via ``build_dgd_config`` — the same route used
by the normal simulation path — so the output always uses the
clean base DGD YAMLs with actual ``command``/``args`` arrays.
"""
"""Handle the AIC-unsupported path via naive config generation."""
if backend == "auto":
backend = _DEFAULT_NAIVE_BACKEND
logger.info(
"Auto backend resolved to '%s' for naive fallback.",
backend,
)
logger.info("Auto backend resolved to '%s' for naive fallback.", backend)
logger.info(
"AIC does not support this combo — falling back to naive config generation."
)
......@@ -169,6 +160,13 @@ def _run_autoscale_sim(
request_latency: float | None,
) -> dict:
"""Build a TaskConfig, run autoscale simulation, collect latencies, generate DGD."""
# TODO(AIC): the autoscale path constructs TaskConfig directly; BackendName("auto")
# is not a valid enum value, so resolve "auto" to a concrete backend here.
# AIC should add native auto-backend support in the autoscale path.
if backend == "auto":
backend = _DEFAULT_NAIVE_BACKEND
logger.info("Auto backend resolved to '%s' for autoscale simulation.", backend)
planner_cfg = dgdr.features.planner if dgdr.features else None
if planner_cfg and planner_cfg.enable_throughput_scaling:
logger.warning(
......@@ -205,6 +203,7 @@ def _run_autoscale_sim(
"dgd_config": dgd_config,
"chosen_exp": "disagg",
"task_configs": task_configs,
"resolved_backend": backend,
}
......@@ -276,12 +275,24 @@ def _run_default_sim(
dgd_config = _generate_dgd_from_pick(dgdr, best_config_df, chosen, task_configs)
# When backend="auto" AIC expands to per-backend task configs; the winning
# row carries the concrete backend name so downstream consumers (e.g.
# run_interpolation) can use it without re-encountering "auto".
resolved_backend = backend
if (
backend == "auto"
and not best_config_df.empty
and "backend" in best_config_df.columns
):
resolved_backend = best_config_df.iloc[0]["backend"]
return {
"best_config_df": best_config_df,
"best_latencies": best_latencies,
"dgd_config": dgd_config,
"chosen_exp": chosen,
"task_configs": task_configs,
"resolved_backend": resolved_backend,
}
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Unit tests for auto-backend resolution in the profiler pipeline."""
import pytest
from dynamo.profiler.rapid import _DEFAULT_NAIVE_BACKEND
pytestmark = [
pytest.mark.unit,
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.parallel,
]
def test_autoscale_sim_resolves_auto_to_default() -> None:
"""_run_autoscale_sim must resolve 'auto' to _DEFAULT_NAIVE_BACKEND before
constructing TaskConfig, since BackendName('auto') is not a valid enum value.
"""
import inspect
from dynamo.profiler.rapid import _run_autoscale_sim
src = inspect.getsource(_run_autoscale_sim)
# The function must guard against "auto" before TaskConfig is constructed.
assert (
'backend == "auto"' in src
), "_run_autoscale_sim must resolve backend='auto' before constructing TaskConfig"
assert (
"_DEFAULT_NAIVE_BACKEND" in src
), "_run_autoscale_sim must fall back to _DEFAULT_NAIVE_BACKEND when backend='auto'"
def test_autoscale_sim_returns_resolved_backend() -> None:
"""_run_autoscale_sim must include 'resolved_backend' in its result dict so
profile_sla.py can pass the concrete backend to run_interpolation.
"""
import inspect
from dynamo.profiler.rapid import _run_autoscale_sim
src = inspect.getsource(_run_autoscale_sim)
assert (
'"resolved_backend"' in src
), "_run_autoscale_sim must return 'resolved_backend' in its result dict"
def test_naive_fallback_resolves_auto_to_default() -> None:
"""_run_naive_fallback must resolve 'auto' to _DEFAULT_NAIVE_BACKEND.
The naive path is taken when AIC doesn't support the model/system combo;
it uses build_naive_generator_params and generate_backend_artifacts which
require a concrete backend string.
"""
import inspect
from dynamo.profiler.rapid import _run_naive_fallback
src = inspect.getsource(_run_naive_fallback)
assert (
'backend == "auto"' in src
), "_run_naive_fallback must resolve backend='auto' before calling AIC helpers"
assert "_DEFAULT_NAIVE_BACKEND" in src
def test_default_sim_returns_resolved_backend() -> None:
"""_run_default_sim must include 'resolved_backend' in its result dict.
When backend='auto', AIC expands to per-backend task configs and the
winning row carries the concrete backend name; _run_default_sim must
extract and surface it so run_interpolation never receives 'auto'.
"""
import inspect
from dynamo.profiler.rapid import _run_default_sim
src = inspect.getsource(_run_default_sim)
assert (
'"resolved_backend"' in src
), "_run_default_sim must return 'resolved_backend' in its result dict"
def test_default_naive_backend_is_concrete() -> None:
"""_DEFAULT_NAIVE_BACKEND must be a concrete backend string, not 'auto'."""
assert _DEFAULT_NAIVE_BACKEND != "auto"
assert _DEFAULT_NAIVE_BACKEND in ("vllm", "sglang", "trtllm")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment