"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "3d7182b80ef11562c5882137287c1f0f268b56a4"
Unverified Commit fbe6bb0a authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

feat: support PVC model cache in profiler (#5124)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 007c5b60
...@@ -13,8 +13,18 @@ ...@@ -13,8 +13,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Protocol from __future__ import annotations
from typing import Any, Protocol
from benchmarks.profiler.utils.config import (
Config,
Container,
PodSpec,
break_arguments,
get_service_name_by_type,
set_argument_value,
)
from benchmarks.profiler.utils.defaults import EngineType from benchmarks.profiler.utils.defaults import EngineType
from dynamo.planner.defaults import SubComponentType from dynamo.planner.defaults import SubComponentType
...@@ -87,9 +97,266 @@ class ConfigModifierProtocol(Protocol): ...@@ -87,9 +97,266 @@ class ConfigModifierProtocol(Protocol):
... ...
@classmethod @classmethod
def update_model(cls, config: dict, model_name: str) -> dict: def update_model(
cls, config: dict, model_name: str, model_path: str | None = None
) -> dict:
... ...
@classmethod @classmethod
def update_image(cls, config: dict, image: str) -> dict: def update_image(cls, config: dict, image: str) -> dict:
... ...
@classmethod
def update_model_from_pvc(
cls,
config: dict,
model_name: str,
pvc_name: str,
pvc_mount_path: str,
pvc_path: str,
) -> dict:
...
class BaseConfigModifier:
"""
Shared helper base class for profiler config modifiers.
This class intentionally lives in `protocol.py` so all backends can inherit
common PVC + volumeMount + frontend CLI patching behavior.
"""
# Subclasses should override, e.g. "vllm" / "sglang" / "trtllm"
BACKEND: str = ""
# Worker CLI arg name for model path / name. vLLM uses "--model"; others use "--model-path".
WORKER_MODEL_PATH_ARG: str = "--model-path"
WORKER_SERVED_MODEL_NAME_ARG: str = "--served-model-name"
@classmethod
def _normalize_model_path(cls, pvc_mount_path: str, pvc_path: str) -> str:
mount = (pvc_mount_path or "").rstrip("/")
sub = (pvc_path or "").lstrip("/")
if not sub:
return mount
return f"{mount}/{sub}"
@classmethod
def _ensure_spec_pvc(cls, cfg: Config, pvc_name: str) -> None:
pvcs = getattr(cfg.spec, "pvcs", None)
if pvcs is None:
pvcs = []
for pvc in pvcs:
if isinstance(pvc, dict) and pvc.get("name") == pvc_name:
# Ensure create is false (do not create PVC in profiling flows)
pvc["create"] = False
setattr(cfg.spec, "pvcs", pvcs)
return
pvcs.append({"name": pvc_name, "create": False})
setattr(cfg.spec, "pvcs", pvcs)
@classmethod
def _ensure_service_volume_mount(
cls, service: Any, pvc_name: str, mount_path: str
) -> None:
volume_mounts = getattr(service, "volumeMounts", None)
if volume_mounts is None:
volume_mounts = []
if not isinstance(volume_mounts, list):
volume_mounts = []
for vm in volume_mounts:
if isinstance(vm, dict) and vm.get("name") == pvc_name:
vm["mountPoint"] = mount_path
setattr(service, "volumeMounts", volume_mounts)
return
volume_mounts.append({"name": pvc_name, "mountPoint": mount_path})
setattr(service, "volumeMounts", volume_mounts)
@classmethod
def _update_container_args_preserving_shell_form(
cls, container: Container, update_fn
) -> None:
"""
Update container args while preserving a common shell form:
- If `command` is `sh -c` and args is a single-string list, keep it that way.
"""
original_args = container.args
cmd = container.command or []
is_shell_c = (
isinstance(cmd, list)
and len(cmd) >= 2
and cmd[0] in ("/bin/sh", "sh")
and cmd[1] == "-c"
)
is_single_string_args = (
isinstance(original_args, list)
and len(original_args) == 1
and isinstance(original_args[0], str)
)
tokens = break_arguments(original_args)
tokens = update_fn(tokens)
if is_shell_c and is_single_string_args:
# Keep as one string for `sh -c`
import shlex
container.args = [shlex.join(tokens)]
else:
container.args = tokens
@classmethod
def _update_frontend_cli(
cls, cfg: Config, model_name: str, model_path: str
) -> None:
frontend = cfg.spec.services.get("Frontend")
if not frontend:
return
if frontend.extraPodSpec is None:
frontend.extraPodSpec = PodSpec(mainContainer=Container())
if frontend.extraPodSpec.mainContainer is None:
frontend.extraPodSpec.mainContainer = Container()
c = frontend.extraPodSpec.mainContainer
# If operator defaults are being used (no command/args), we must provide full CLI.
if not c.command and not c.args:
c.command = ["python3", "-m", "dynamo.frontend"]
c.args = []
def _patch(tokens: list[str]) -> list[str]:
tokens = set_argument_value(tokens, "--model-name", model_name)
tokens = set_argument_value(tokens, "--model-path", model_path)
return tokens
cls._update_container_args_preserving_shell_form(c, _patch)
@classmethod
def _apply_model_update_to_cfg(
cls,
cfg: Config,
model_name: str,
model_path: str,
patch_frontend: bool,
) -> None:
"""
Apply model updates to a validated DGD config object.
This is the shared implementation for both:
- update_model()
- update_model_from_pvc()
"""
# Update workers (prefill + decode) if present.
for sct in (SubComponentType.PREFILL, SubComponentType.DECODE):
try:
svc_name = get_service_name_by_type(cfg, cls.BACKEND, sct)
except Exception:
continue
if svc_name not in cfg.spec.services:
continue
service = cfg.spec.services[svc_name]
if not service.extraPodSpec or not service.extraPodSpec.mainContainer:
continue
c = service.extraPodSpec.mainContainer
def _patch(tokens: list[str]) -> list[str]:
tokens = set_argument_value(
tokens, cls.WORKER_MODEL_PATH_ARG, model_path
)
tokens = set_argument_value(
tokens, cls.WORKER_SERVED_MODEL_NAME_ARG, model_name
)
return tokens
cls._update_container_args_preserving_shell_form(c, _patch)
if patch_frontend:
cls._update_frontend_cli(cfg, model_name=model_name, model_path=model_path)
@classmethod
def update_model(
cls, config: dict, model_name: str, model_path: str | None = None
) -> dict:
"""
Unified model update API.
Args:
config: DGD config dict
model_name: served model name (HF id)
model_path: model path inside container (if using PVC/local path). If omitted,
defaults to model_name (HF download case for workers).
"""
cfg = Config.model_validate(config)
if model_path is None:
model_path = model_name
# Frontend requires a real filesystem path (validate_model_path checks isdir),
# so only inject model args when `model_path` looks like a path.
patch_frontend = bool(
isinstance(model_path, str)
and (model_path.startswith("/") or model_path.startswith("."))
)
cls._apply_model_update_to_cfg(
cfg,
model_name=model_name,
model_path=model_path,
patch_frontend=patch_frontend,
)
return cfg.model_dump()
@classmethod
def update_model_from_pvc(
cls,
config: dict,
model_name: str,
pvc_name: str,
pvc_mount_path: str,
pvc_path: str,
) -> dict:
"""
Update a DGD config to serve `model_name`, with weights located in a mounted PVC.
Common steps across backends:
- Add `spec.pvcs`
- Add `volumeMounts` for Frontend + prefill + decode (if present)
- Patch Frontend CLI (`--model-name`, `--model-path`)
- Delegate worker CLI patching to backend-specific implementation.
"""
if not pvc_name:
return config
cfg = Config.model_validate(config)
model_path = cls._normalize_model_path(pvc_mount_path, pvc_path)
cls._ensure_spec_pvc(cfg, pvc_name)
# Mount to Frontend + prefill + decode services if present.
if "Frontend" in cfg.spec.services:
cls._ensure_service_volume_mount(
cfg.spec.services["Frontend"], pvc_name, pvc_mount_path
)
for sct in (SubComponentType.PREFILL, SubComponentType.DECODE):
svc_name = get_service_name_by_type(cfg, cls.BACKEND, sct)
if svc_name in cfg.spec.services:
cls._ensure_service_volume_mount(
cfg.spec.services[svc_name], pvc_name, pvc_mount_path
)
# Patch workers + frontend with PVC model path.
cls._apply_model_update_to_cfg(
cfg,
model_name=model_name,
model_path=model_path,
patch_frontend=True,
)
return cfg.model_dump()
...@@ -18,6 +18,7 @@ from benchmarks.profiler.utils.config import ( ...@@ -18,6 +18,7 @@ from benchmarks.profiler.utils.config import (
update_image, update_image,
validate_and_get_worker_args, validate_and_get_worker_args,
) )
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import ( from benchmarks.profiler.utils.defaults import (
DEFAULT_MODEL_NAME, DEFAULT_MODEL_NAME,
DYNAMO_RUN_DEFAULT_PORT, DYNAMO_RUN_DEFAULT_PORT,
...@@ -39,40 +40,14 @@ logger.addHandler(console_handler) ...@@ -39,40 +40,14 @@ logger.addHandler(console_handler)
DEFAULT_SGLANG_CONFIG_PATH = "examples/backends/sglang/deploy/disagg.yaml" DEFAULT_SGLANG_CONFIG_PATH = "examples/backends/sglang/deploy/disagg.yaml"
class SGLangConfigModifier: class SGLangConfigModifier(BaseConfigModifier):
BACKEND = "sglang"
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls) -> dict:
with open(DEFAULT_SGLANG_CONFIG_PATH, "r") as f: with open(DEFAULT_SGLANG_CONFIG_PATH, "r") as f:
return yaml.safe_load(f) return yaml.safe_load(f)
@classmethod
def update_model(cls, config, model_name: str) -> dict:
# change the model to serve
cfg = Config.model_validate(config)
# Update model for both prefill and decode workers
for sub_component_type in [SubComponentType.PREFILL, SubComponentType.DECODE]:
try:
worker_service = get_worker_service_from_config(
cfg, backend="sglang", sub_component_type=sub_component_type
)
args = validate_and_get_worker_args(worker_service, backend="sglang")
args = break_arguments(args)
# Update both --model-path and --served-model-name
args = set_argument_value(args, "--model-path", model_name)
args = set_argument_value(args, "--served-model-name", model_name)
worker_service.extraPodSpec.mainContainer.args = args
except (ValueError, KeyError):
# Service might not exist (e.g., in aggregated mode)
logger.debug(
f"Skipping {sub_component_type} service as it doesn't exist"
)
continue
return cfg.model_dump()
@classmethod @classmethod
def update_image(cls, config, image: str) -> dict: def update_image(cls, config, image: str) -> dict:
"""Update container image for all DGD services (frontend, planner, workers).""" """Update container image for all DGD services (frontend, planner, workers)."""
...@@ -292,6 +267,7 @@ class SGLangConfigModifier: ...@@ -292,6 +267,7 @@ class SGLangConfigModifier:
args = remove_valued_arguments(args, "--data-parallel-size") args = remove_valued_arguments(args, "--data-parallel-size")
# 3. Enable --enable-dp-attention # 3. Enable --enable-dp-attention
if "--enable-dp-attention" not in args:
args = append_argument(args, "--enable-dp-attention") args = append_argument(args, "--enable-dp-attention")
# 4. Set --ep=dep_size (expert parallelism size) # 4. Set --ep=dep_size (expert parallelism size)
......
...@@ -15,11 +15,11 @@ from benchmarks.profiler.utils.config import ( ...@@ -15,11 +15,11 @@ from benchmarks.profiler.utils.config import (
get_worker_service_from_config, get_worker_service_from_config,
parse_override_engine_args, parse_override_engine_args,
remove_valued_arguments, remove_valued_arguments,
set_argument_value,
setup_worker_service_resources, setup_worker_service_resources,
update_image, update_image,
validate_and_get_worker_args, validate_and_get_worker_args,
) )
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import ( from benchmarks.profiler.utils.defaults import (
DEFAULT_MODEL_NAME, DEFAULT_MODEL_NAME,
DYNAMO_RUN_DEFAULT_PORT, DYNAMO_RUN_DEFAULT_PORT,
...@@ -41,40 +41,14 @@ logger.addHandler(console_handler) ...@@ -41,40 +41,14 @@ logger.addHandler(console_handler)
DEFAULT_TRTLLM_CONFIG_PATH = "examples/backends/trtllm/deploy/disagg.yaml" DEFAULT_TRTLLM_CONFIG_PATH = "examples/backends/trtllm/deploy/disagg.yaml"
class TrtllmConfigModifier: class TrtllmConfigModifier(BaseConfigModifier):
BACKEND = "trtllm"
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls) -> dict:
with open(DEFAULT_TRTLLM_CONFIG_PATH, "r") as f: with open(DEFAULT_TRTLLM_CONFIG_PATH, "r") as f:
return yaml.safe_load(f) return yaml.safe_load(f)
@classmethod
def update_model(cls, config, model_name: str) -> dict:
# change the model to serve
cfg = Config.model_validate(config)
# Update model for both prefill and decode workers
for sub_component_type in [SubComponentType.PREFILL, SubComponentType.DECODE]:
try:
worker_service = get_worker_service_from_config(
cfg, backend="trtllm", sub_component_type=sub_component_type
)
args = validate_and_get_worker_args(worker_service, backend="trtllm")
args = break_arguments(args)
# Update both --model-path and --served-model-name
args = set_argument_value(args, "--model-path", model_name)
args = set_argument_value(args, "--served-model-name", model_name)
worker_service.extraPodSpec.mainContainer.args = args
except (ValueError, KeyError):
# Service might not exist (e.g., in aggregated mode)
logger.debug(
f"Skipping {sub_component_type} service as it doesn't exist"
)
continue
return cfg.model_dump()
@classmethod @classmethod
def update_image(cls, config, image: str) -> dict: def update_image(cls, config, image: str) -> dict:
"""Update container image for all DGD services (frontend, planner, workers).""" """Update container image for all DGD services (frontend, planner, workers)."""
......
...@@ -16,6 +16,7 @@ from benchmarks.profiler.utils.config import ( ...@@ -16,6 +16,7 @@ from benchmarks.profiler.utils.config import (
update_image, update_image,
validate_and_get_worker_args, validate_and_get_worker_args,
) )
from benchmarks.profiler.utils.config_modifiers.protocol import BaseConfigModifier
from benchmarks.profiler.utils.defaults import ( from benchmarks.profiler.utils.defaults import (
DEFAULT_MODEL_NAME, DEFAULT_MODEL_NAME,
DYNAMO_RUN_DEFAULT_PORT, DYNAMO_RUN_DEFAULT_PORT,
...@@ -37,39 +38,16 @@ logger.addHandler(console_handler) ...@@ -37,39 +38,16 @@ logger.addHandler(console_handler)
DEFAULT_VLLM_CONFIG_PATH = "examples/backends/vllm/deploy/disagg.yaml" DEFAULT_VLLM_CONFIG_PATH = "examples/backends/vllm/deploy/disagg.yaml"
class VllmV1ConfigModifier: class VllmV1ConfigModifier(BaseConfigModifier):
BACKEND = "vllm"
# vllm uses a different arg for model path
WORKER_MODEL_PATH_ARG = "--model"
@classmethod @classmethod
def load_default_config(cls) -> dict: def load_default_config(cls) -> dict:
with open(DEFAULT_VLLM_CONFIG_PATH, "r") as f: with open(DEFAULT_VLLM_CONFIG_PATH, "r") as f:
return yaml.safe_load(f) return yaml.safe_load(f)
@classmethod
def update_model(cls, config, model_name: str) -> dict:
# change the model to serve
cfg = Config.model_validate(config)
# Update model for both prefill and decode workers
for sub_component_type in [SubComponentType.PREFILL, SubComponentType.DECODE]:
try:
worker_service = get_worker_service_from_config(
cfg, backend="vllm", sub_component_type=sub_component_type
)
args = validate_and_get_worker_args(worker_service, backend="vllm")
args = break_arguments(args)
# Update --model (vllm uses --model instead of --model-path and --served-model-name)
args = set_argument_value(args, "--model", model_name)
worker_service.extraPodSpec.mainContainer.args = args
except (ValueError, KeyError):
# Service might not exist (e.g., in aggregated mode)
logger.debug(
f"Skipping {sub_component_type} service as it doesn't exist"
)
continue
return cfg.model_dump()
@classmethod @classmethod
def update_image(cls, config, image: str) -> dict: def update_image(cls, config, image: str) -> dict:
"""Update container image for all DGD services (frontend, planner, workers).""" """Update container image for all DGD services (frontend, planner, workers)."""
......
...@@ -66,7 +66,13 @@ def create_profiler_parser() -> argparse.Namespace: ...@@ -66,7 +66,13 @@ def create_profiler_parser() -> argparse.Namespace:
deployment: deployment:
namespace: String (kubernetes namespace, default: dynamo-sla-profiler) namespace: String (kubernetes namespace, default: dynamo-sla-profiler)
service_name: String (service name, default: "") service_name: String (service name, default: "")
model: String (model to serve, can be HF model name or local model path) model: String (served model name)
model_cache_pvc_name: String (name of the PVC to mount the model cache,
if not provided, model must be HF name and will download from HF, default: "")
model_cache_pvc_path: String (path to the model cache in the PVC, default: "")
model_cache_pvc_mount_path: String (path to the model cache in the container,
note that the PVC must be mounted to the same path for the profiling job,
default: "/opt/model-cache")
engine: engine:
backend: String (backend type, currently support [vllm, sglang, trtllm], default: vllm) backend: String (backend type, currently support [vllm, sglang, trtllm], default: vllm)
config: String (path to the DynamoGraphDeployment config file, default: "") config: String (path to the DynamoGraphDeployment config file, default: "")
...@@ -122,7 +128,27 @@ def create_profiler_parser() -> argparse.Namespace: ...@@ -122,7 +128,27 @@ def create_profiler_parser() -> argparse.Namespace:
"--model", "--model",
type=str, type=str,
default=config.get("deployment", {}).get("model", ""), default=config.get("deployment", {}).get("model", ""),
help="Model to serve, can be HF model name or local model path", help="Served model name",
)
parser.add_argument(
"--model-cache-pvc-name",
type=str,
default=config.get("deployment", {}).get("model_cache_pvc_name", ""),
help="Name of the PVC that contains the model weights. If not provided, args.model must be a HF model name and will download from HF",
)
parser.add_argument(
"--model-cache-pvc-path",
type=str,
default=config.get("deployment", {}).get("model_cache_pvc_path", ""),
help="Path to the model cache in the PVC",
)
parser.add_argument(
"--model-cache-pvc-mount-path",
type=str,
default=config.get("deployment", {}).get(
"model_cache_pvc_mount_path", "/opt/model-cache"
),
help="Path to the model cache in the container, note that the PVC must be mounted to the same path for the profiling job",
) )
parser.add_argument( parser.add_argument(
"--dgd-image", "--dgd-image",
......
...@@ -44,7 +44,17 @@ def auto_generate_search_space(args: argparse.Namespace) -> None: ...@@ -44,7 +44,17 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
if args.model: if args.model:
logger.info(f"Updating model in DGD config file to {args.model}") logger.info(f"Updating model in DGD config file to {args.model}")
config = config_modifier.update_model(config, args.model) if args.model_cache_pvc_name:
config = config_modifier.update_model_from_pvc(
config,
args.model,
args.model_cache_pvc_name,
args.model_cache_pvc_mount_path,
args.model_cache_pvc_path,
)
else:
# Non-PVC: workers download from HF, so model_path == model_name
config = config_modifier.update_model(config, args.model, args.model)
if args.dgd_image: if args.dgd_image:
logger.info(f"Updating DGD image to {args.dgd_image}") logger.info(f"Updating DGD image to {args.dgd_image}")
config = config_modifier.update_image(config, args.dgd_image) config = config_modifier.update_image(config, args.dgd_image)
...@@ -58,10 +68,29 @@ def auto_generate_search_space(args: argparse.Namespace) -> None: ...@@ -58,10 +68,29 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
# get model info and update args # get model info and update args
model_info: ModelInfo | None = None model_info: ModelInfo | None = None
if not args.model: model_name_or_path = ""
if args.model:
# prioritize using model cache in PVC over downloading from HF
if args.model_cache_pvc_name:
# Keep consistent path normalization with config mutation logic
model_name_or_path = config_modifier._normalize_model_path(
args.model_cache_pvc_mount_path, args.model_cache_pvc_path
)
else:
model_name_or_path = args.model
else:
# get the model name from config # get the model name from config
args.model = config_modifier.get_model_name(config) args.model = config_modifier.get_model_name(config)
logger.info(f"Getting model info for {args.model}...") model_name_or_path = args.model
logger.info(f"Getting model info for {args.model} at {model_name_or_path}...")
try:
model_info = get_model_info(model_name_or_path)
except Exception as e:
# Common in dry-run mode when the PVC isn't mounted locally.
logger.warning(
f"Failed to load model info from local path '{model_name_or_path}': {e}. "
f"Trying to download from HF for '{args.model}'."
)
model_info = get_model_info(args.model) model_info = get_model_info(args.model)
num_experts_str = ( num_experts_str = (
......
...@@ -74,6 +74,9 @@ class TestProfileSLADryRun: ...@@ -74,6 +74,9 @@ class TestProfileSLADryRun:
self.num_gpus_per_node = 8 self.num_gpus_per_node = 8
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
# Provide minimal model_info to avoid HF queries # Provide minimal model_info to avoid HF queries
self.model_info = ModelInfo( self.model_info = ModelInfo(
model_size=16384.0, model_size=16384.0,
...@@ -118,6 +121,9 @@ class TestProfileSLADryRun: ...@@ -118,6 +121,9 @@ class TestProfileSLADryRun:
self.num_gpus_per_node = 8 self.num_gpus_per_node = 8
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
self.model_info = ModelInfo( self.model_info = ModelInfo(
model_size=16384.0, model_size=16384.0,
architecture="TestArchitecture", architecture="TestArchitecture",
...@@ -183,6 +189,9 @@ class TestProfileSLADryRun: ...@@ -183,6 +189,9 @@ class TestProfileSLADryRun:
self.num_gpus_per_node = 8 self.num_gpus_per_node = 8
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
self.model_info = ModelInfo( self.model_info = ModelInfo(
model_size=16384.0, model_size=16384.0,
architecture="TestArchitecture", architecture="TestArchitecture",
...@@ -237,6 +246,10 @@ class TestProfileSLADryRun: ...@@ -237,6 +246,10 @@ class TestProfileSLADryRun:
self.num_gpus_per_node = 8 self.num_gpus_per_node = 8
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
# Added in newer profiler versions; keep Args compatible with search_space_autogen
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
self.model_info = ModelInfo( self.model_info = ModelInfo(
model_size=65536.0, model_size=65536.0,
architecture="TestMoEArchitecture", architecture="TestMoEArchitecture",
...@@ -315,6 +328,9 @@ class TestProfileSLADryRun: ...@@ -315,6 +328,9 @@ class TestProfileSLADryRun:
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
self.enable_gpu_discovery = True self.enable_gpu_discovery = True
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
return Args() return Args()
...@@ -383,6 +399,9 @@ class TestProfileSLADryRun: ...@@ -383,6 +399,9 @@ class TestProfileSLADryRun:
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
self.enable_gpu_discovery = True self.enable_gpu_discovery = True
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
return Args() return Args()
...@@ -451,6 +470,9 @@ class TestProfileSLADryRun: ...@@ -451,6 +470,9 @@ class TestProfileSLADryRun:
self.deploy_after_profile = False self.deploy_after_profile = False
self.pick_with_webui = False self.pick_with_webui = False
self.enable_gpu_discovery = True self.enable_gpu_discovery = True
self.model_cache_pvc_name = ""
self.model_cache_pvc_path = ""
self.model_cache_pvc_mount_path = "/opt/model-cache"
return Args() return Args()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment