Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
fbe6bb0a
Unverified
Commit
fbe6bb0a
authored
Jan 05, 2026
by
Hongkuan Zhou
Committed by
GitHub
Jan 05, 2026
Browse files
feat: support PVC model cache in profiler (#5124)
Signed-off-by:
hongkuanz
<
hongkuanz@nvidia.com
>
parent
007c5b60
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
368 additions
and
96 deletions
+368
-96
benchmarks/profiler/utils/config_modifiers/protocol.py
benchmarks/profiler/utils/config_modifiers/protocol.py
+269
-2
benchmarks/profiler/utils/config_modifiers/sglang.py
benchmarks/profiler/utils/config_modifiers/sglang.py
+6
-30
benchmarks/profiler/utils/config_modifiers/trtllm.py
benchmarks/profiler/utils/config_modifiers/trtllm.py
+4
-30
benchmarks/profiler/utils/config_modifiers/vllm.py
benchmarks/profiler/utils/config_modifiers/vllm.py
+6
-28
benchmarks/profiler/utils/profiler_argparse.py
benchmarks/profiler/utils/profiler_argparse.py
+28
-2
benchmarks/profiler/utils/search_space_autogen.py
benchmarks/profiler/utils/search_space_autogen.py
+33
-4
tests/profiler/test_profile_sla_dryrun.py
tests/profiler/test_profile_sla_dryrun.py
+22
-0
No files found.
benchmarks/profiler/utils/config_modifiers/protocol.py
View file @
fbe6bb0a
...
@@ -13,8 +13,18 @@
...
@@ -13,8 +13,18 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
typing
import
Protocol
from
__future__
import
annotations
from
typing
import
Any
,
Protocol
from
benchmarks.profiler.utils.config
import
(
Config
,
Container
,
PodSpec
,
break_arguments
,
get_service_name_by_type
,
set_argument_value
,
)
from
benchmarks.profiler.utils.defaults
import
EngineType
from
benchmarks.profiler.utils.defaults
import
EngineType
from
dynamo.planner.defaults
import
SubComponentType
from
dynamo.planner.defaults
import
SubComponentType
...
@@ -87,9 +97,266 @@ class ConfigModifierProtocol(Protocol):
...
@@ -87,9 +97,266 @@ class ConfigModifierProtocol(Protocol):
...
...
@
classmethod
@
classmethod
def
update_model
(
cls
,
config
:
dict
,
model_name
:
str
)
->
dict
:
def
update_model
(
cls
,
config
:
dict
,
model_name
:
str
,
model_path
:
str
|
None
=
None
)
->
dict
:
...
...
@
classmethod
@
classmethod
def
update_image
(
cls
,
config
:
dict
,
image
:
str
)
->
dict
:
def
update_image
(
cls
,
config
:
dict
,
image
:
str
)
->
dict
:
...
...
@
classmethod
def
update_model_from_pvc
(
cls
,
config
:
dict
,
model_name
:
str
,
pvc_name
:
str
,
pvc_mount_path
:
str
,
pvc_path
:
str
,
)
->
dict
:
...
class
BaseConfigModifier
:
"""
Shared helper base class for profiler config modifiers.
This class intentionally lives in `protocol.py` so all backends can inherit
common PVC + volumeMount + frontend CLI patching behavior.
"""
# Subclasses should override, e.g. "vllm" / "sglang" / "trtllm"
BACKEND
:
str
=
""
# Worker CLI arg name for model path / name. vLLM uses "--model"; others use "--model-path".
WORKER_MODEL_PATH_ARG
:
str
=
"--model-path"
WORKER_SERVED_MODEL_NAME_ARG
:
str
=
"--served-model-name"
@
classmethod
def
_normalize_model_path
(
cls
,
pvc_mount_path
:
str
,
pvc_path
:
str
)
->
str
:
mount
=
(
pvc_mount_path
or
""
).
rstrip
(
"/"
)
sub
=
(
pvc_path
or
""
).
lstrip
(
"/"
)
if
not
sub
:
return
mount
return
f
"
{
mount
}
/
{
sub
}
"
@
classmethod
def
_ensure_spec_pvc
(
cls
,
cfg
:
Config
,
pvc_name
:
str
)
->
None
:
pvcs
=
getattr
(
cfg
.
spec
,
"pvcs"
,
None
)
if
pvcs
is
None
:
pvcs
=
[]
for
pvc
in
pvcs
:
if
isinstance
(
pvc
,
dict
)
and
pvc
.
get
(
"name"
)
==
pvc_name
:
# Ensure create is false (do not create PVC in profiling flows)
pvc
[
"create"
]
=
False
setattr
(
cfg
.
spec
,
"pvcs"
,
pvcs
)
return
pvcs
.
append
({
"name"
:
pvc_name
,
"create"
:
False
})
setattr
(
cfg
.
spec
,
"pvcs"
,
pvcs
)
@
classmethod
def
_ensure_service_volume_mount
(
cls
,
service
:
Any
,
pvc_name
:
str
,
mount_path
:
str
)
->
None
:
volume_mounts
=
getattr
(
service
,
"volumeMounts"
,
None
)
if
volume_mounts
is
None
:
volume_mounts
=
[]
if
not
isinstance
(
volume_mounts
,
list
):
volume_mounts
=
[]
for
vm
in
volume_mounts
:
if
isinstance
(
vm
,
dict
)
and
vm
.
get
(
"name"
)
==
pvc_name
:
vm
[
"mountPoint"
]
=
mount_path
setattr
(
service
,
"volumeMounts"
,
volume_mounts
)
return
volume_mounts
.
append
({
"name"
:
pvc_name
,
"mountPoint"
:
mount_path
})
setattr
(
service
,
"volumeMounts"
,
volume_mounts
)
@
classmethod
def
_update_container_args_preserving_shell_form
(
cls
,
container
:
Container
,
update_fn
)
->
None
:
"""
Update container args while preserving a common shell form:
- If `command` is `sh -c` and args is a single-string list, keep it that way.
"""
original_args
=
container
.
args
cmd
=
container
.
command
or
[]
is_shell_c
=
(
isinstance
(
cmd
,
list
)
and
len
(
cmd
)
>=
2
and
cmd
[
0
]
in
(
"/bin/sh"
,
"sh"
)
and
cmd
[
1
]
==
"-c"
)
is_single_string_args
=
(
isinstance
(
original_args
,
list
)
and
len
(
original_args
)
==
1
and
isinstance
(
original_args
[
0
],
str
)
)
tokens
=
break_arguments
(
original_args
)
tokens
=
update_fn
(
tokens
)
if
is_shell_c
and
is_single_string_args
:
# Keep as one string for `sh -c`
import
shlex
container
.
args
=
[
shlex
.
join
(
tokens
)]
else
:
container
.
args
=
tokens
@
classmethod
def
_update_frontend_cli
(
cls
,
cfg
:
Config
,
model_name
:
str
,
model_path
:
str
)
->
None
:
frontend
=
cfg
.
spec
.
services
.
get
(
"Frontend"
)
if
not
frontend
:
return
if
frontend
.
extraPodSpec
is
None
:
frontend
.
extraPodSpec
=
PodSpec
(
mainContainer
=
Container
())
if
frontend
.
extraPodSpec
.
mainContainer
is
None
:
frontend
.
extraPodSpec
.
mainContainer
=
Container
()
c
=
frontend
.
extraPodSpec
.
mainContainer
# If operator defaults are being used (no command/args), we must provide full CLI.
if
not
c
.
command
and
not
c
.
args
:
c
.
command
=
[
"python3"
,
"-m"
,
"dynamo.frontend"
]
c
.
args
=
[]
def
_patch
(
tokens
:
list
[
str
])
->
list
[
str
]:
tokens
=
set_argument_value
(
tokens
,
"--model-name"
,
model_name
)
tokens
=
set_argument_value
(
tokens
,
"--model-path"
,
model_path
)
return
tokens
cls
.
_update_container_args_preserving_shell_form
(
c
,
_patch
)
@
classmethod
def
_apply_model_update_to_cfg
(
cls
,
cfg
:
Config
,
model_name
:
str
,
model_path
:
str
,
patch_frontend
:
bool
,
)
->
None
:
"""
Apply model updates to a validated DGD config object.
This is the shared implementation for both:
- update_model()
- update_model_from_pvc()
"""
# Update workers (prefill + decode) if present.
for
sct
in
(
SubComponentType
.
PREFILL
,
SubComponentType
.
DECODE
):
try
:
svc_name
=
get_service_name_by_type
(
cfg
,
cls
.
BACKEND
,
sct
)
except
Exception
:
continue
if
svc_name
not
in
cfg
.
spec
.
services
:
continue
service
=
cfg
.
spec
.
services
[
svc_name
]
if
not
service
.
extraPodSpec
or
not
service
.
extraPodSpec
.
mainContainer
:
continue
c
=
service
.
extraPodSpec
.
mainContainer
def
_patch
(
tokens
:
list
[
str
])
->
list
[
str
]:
tokens
=
set_argument_value
(
tokens
,
cls
.
WORKER_MODEL_PATH_ARG
,
model_path
)
tokens
=
set_argument_value
(
tokens
,
cls
.
WORKER_SERVED_MODEL_NAME_ARG
,
model_name
)
return
tokens
cls
.
_update_container_args_preserving_shell_form
(
c
,
_patch
)
if
patch_frontend
:
cls
.
_update_frontend_cli
(
cfg
,
model_name
=
model_name
,
model_path
=
model_path
)
@
classmethod
def
update_model
(
cls
,
config
:
dict
,
model_name
:
str
,
model_path
:
str
|
None
=
None
)
->
dict
:
"""
Unified model update API.
Args:
config: DGD config dict
model_name: served model name (HF id)
model_path: model path inside container (if using PVC/local path). If omitted,
defaults to model_name (HF download case for workers).
"""
cfg
=
Config
.
model_validate
(
config
)
if
model_path
is
None
:
model_path
=
model_name
# Frontend requires a real filesystem path (validate_model_path checks isdir),
# so only inject model args when `model_path` looks like a path.
patch_frontend
=
bool
(
isinstance
(
model_path
,
str
)
and
(
model_path
.
startswith
(
"/"
)
or
model_path
.
startswith
(
"."
))
)
cls
.
_apply_model_update_to_cfg
(
cfg
,
model_name
=
model_name
,
model_path
=
model_path
,
patch_frontend
=
patch_frontend
,
)
return
cfg
.
model_dump
()
@
classmethod
def
update_model_from_pvc
(
cls
,
config
:
dict
,
model_name
:
str
,
pvc_name
:
str
,
pvc_mount_path
:
str
,
pvc_path
:
str
,
)
->
dict
:
"""
Update a DGD config to serve `model_name`, with weights located in a mounted PVC.
Common steps across backends:
- Add `spec.pvcs`
- Add `volumeMounts` for Frontend + prefill + decode (if present)
- Patch Frontend CLI (`--model-name`, `--model-path`)
- Delegate worker CLI patching to backend-specific implementation.
"""
if
not
pvc_name
:
return
config
cfg
=
Config
.
model_validate
(
config
)
model_path
=
cls
.
_normalize_model_path
(
pvc_mount_path
,
pvc_path
)
cls
.
_ensure_spec_pvc
(
cfg
,
pvc_name
)
# Mount to Frontend + prefill + decode services if present.
if
"Frontend"
in
cfg
.
spec
.
services
:
cls
.
_ensure_service_volume_mount
(
cfg
.
spec
.
services
[
"Frontend"
],
pvc_name
,
pvc_mount_path
)
for
sct
in
(
SubComponentType
.
PREFILL
,
SubComponentType
.
DECODE
):
svc_name
=
get_service_name_by_type
(
cfg
,
cls
.
BACKEND
,
sct
)
if
svc_name
in
cfg
.
spec
.
services
:
cls
.
_ensure_service_volume_mount
(
cfg
.
spec
.
services
[
svc_name
],
pvc_name
,
pvc_mount_path
)
# Patch workers + frontend with PVC model path.
cls
.
_apply_model_update_to_cfg
(
cfg
,
model_name
=
model_name
,
model_path
=
model_path
,
patch_frontend
=
True
,
)
return
cfg
.
model_dump
()
benchmarks/profiler/utils/config_modifiers/sglang.py
View file @
fbe6bb0a
...
@@ -18,6 +18,7 @@ from benchmarks.profiler.utils.config import (
...
@@ -18,6 +18,7 @@ from benchmarks.profiler.utils.config import (
update_image
,
update_image
,
validate_and_get_worker_args
,
validate_and_get_worker_args
,
)
)
from
benchmarks.profiler.utils.config_modifiers.protocol
import
BaseConfigModifier
from
benchmarks.profiler.utils.defaults
import
(
from
benchmarks.profiler.utils.defaults
import
(
DEFAULT_MODEL_NAME
,
DEFAULT_MODEL_NAME
,
DYNAMO_RUN_DEFAULT_PORT
,
DYNAMO_RUN_DEFAULT_PORT
,
...
@@ -39,40 +40,14 @@ logger.addHandler(console_handler)
...
@@ -39,40 +40,14 @@ logger.addHandler(console_handler)
DEFAULT_SGLANG_CONFIG_PATH
=
"examples/backends/sglang/deploy/disagg.yaml"
DEFAULT_SGLANG_CONFIG_PATH
=
"examples/backends/sglang/deploy/disagg.yaml"
class
SGLangConfigModifier
:
class
SGLangConfigModifier
(
BaseConfigModifier
):
BACKEND
=
"sglang"
@
classmethod
@
classmethod
def
load_default_config
(
cls
)
->
dict
:
def
load_default_config
(
cls
)
->
dict
:
with
open
(
DEFAULT_SGLANG_CONFIG_PATH
,
"r"
)
as
f
:
with
open
(
DEFAULT_SGLANG_CONFIG_PATH
,
"r"
)
as
f
:
return
yaml
.
safe_load
(
f
)
return
yaml
.
safe_load
(
f
)
@
classmethod
def
update_model
(
cls
,
config
,
model_name
:
str
)
->
dict
:
# change the model to serve
cfg
=
Config
.
model_validate
(
config
)
# Update model for both prefill and decode workers
for
sub_component_type
in
[
SubComponentType
.
PREFILL
,
SubComponentType
.
DECODE
]:
try
:
worker_service
=
get_worker_service_from_config
(
cfg
,
backend
=
"sglang"
,
sub_component_type
=
sub_component_type
)
args
=
validate_and_get_worker_args
(
worker_service
,
backend
=
"sglang"
)
args
=
break_arguments
(
args
)
# Update both --model-path and --served-model-name
args
=
set_argument_value
(
args
,
"--model-path"
,
model_name
)
args
=
set_argument_value
(
args
,
"--served-model-name"
,
model_name
)
worker_service
.
extraPodSpec
.
mainContainer
.
args
=
args
except
(
ValueError
,
KeyError
):
# Service might not exist (e.g., in aggregated mode)
logger
.
debug
(
f
"Skipping
{
sub_component_type
}
service as it doesn't exist"
)
continue
return
cfg
.
model_dump
()
@
classmethod
@
classmethod
def
update_image
(
cls
,
config
,
image
:
str
)
->
dict
:
def
update_image
(
cls
,
config
,
image
:
str
)
->
dict
:
"""Update container image for all DGD services (frontend, planner, workers)."""
"""Update container image for all DGD services (frontend, planner, workers)."""
...
@@ -292,7 +267,8 @@ class SGLangConfigModifier:
...
@@ -292,7 +267,8 @@ class SGLangConfigModifier:
args
=
remove_valued_arguments
(
args
,
"--data-parallel-size"
)
args
=
remove_valued_arguments
(
args
,
"--data-parallel-size"
)
# 3. Enable --enable-dp-attention
# 3. Enable --enable-dp-attention
args
=
append_argument
(
args
,
"--enable-dp-attention"
)
if
"--enable-dp-attention"
not
in
args
:
args
=
append_argument
(
args
,
"--enable-dp-attention"
)
# 4. Set --ep=dep_size (expert parallelism size)
# 4. Set --ep=dep_size (expert parallelism size)
args
=
set_argument_value
(
args
,
"--ep"
,
str
(
dep_size
))
args
=
set_argument_value
(
args
,
"--ep"
,
str
(
dep_size
))
...
...
benchmarks/profiler/utils/config_modifiers/trtllm.py
View file @
fbe6bb0a
...
@@ -15,11 +15,11 @@ from benchmarks.profiler.utils.config import (
...
@@ -15,11 +15,11 @@ from benchmarks.profiler.utils.config import (
get_worker_service_from_config
,
get_worker_service_from_config
,
parse_override_engine_args
,
parse_override_engine_args
,
remove_valued_arguments
,
remove_valued_arguments
,
set_argument_value
,
setup_worker_service_resources
,
setup_worker_service_resources
,
update_image
,
update_image
,
validate_and_get_worker_args
,
validate_and_get_worker_args
,
)
)
from
benchmarks.profiler.utils.config_modifiers.protocol
import
BaseConfigModifier
from
benchmarks.profiler.utils.defaults
import
(
from
benchmarks.profiler.utils.defaults
import
(
DEFAULT_MODEL_NAME
,
DEFAULT_MODEL_NAME
,
DYNAMO_RUN_DEFAULT_PORT
,
DYNAMO_RUN_DEFAULT_PORT
,
...
@@ -41,40 +41,14 @@ logger.addHandler(console_handler)
...
@@ -41,40 +41,14 @@ logger.addHandler(console_handler)
DEFAULT_TRTLLM_CONFIG_PATH
=
"examples/backends/trtllm/deploy/disagg.yaml"
DEFAULT_TRTLLM_CONFIG_PATH
=
"examples/backends/trtllm/deploy/disagg.yaml"
class
TrtllmConfigModifier
:
class
TrtllmConfigModifier
(
BaseConfigModifier
):
BACKEND
=
"trtllm"
@
classmethod
@
classmethod
def
load_default_config
(
cls
)
->
dict
:
def
load_default_config
(
cls
)
->
dict
:
with
open
(
DEFAULT_TRTLLM_CONFIG_PATH
,
"r"
)
as
f
:
with
open
(
DEFAULT_TRTLLM_CONFIG_PATH
,
"r"
)
as
f
:
return
yaml
.
safe_load
(
f
)
return
yaml
.
safe_load
(
f
)
@
classmethod
def
update_model
(
cls
,
config
,
model_name
:
str
)
->
dict
:
# change the model to serve
cfg
=
Config
.
model_validate
(
config
)
# Update model for both prefill and decode workers
for
sub_component_type
in
[
SubComponentType
.
PREFILL
,
SubComponentType
.
DECODE
]:
try
:
worker_service
=
get_worker_service_from_config
(
cfg
,
backend
=
"trtllm"
,
sub_component_type
=
sub_component_type
)
args
=
validate_and_get_worker_args
(
worker_service
,
backend
=
"trtllm"
)
args
=
break_arguments
(
args
)
# Update both --model-path and --served-model-name
args
=
set_argument_value
(
args
,
"--model-path"
,
model_name
)
args
=
set_argument_value
(
args
,
"--served-model-name"
,
model_name
)
worker_service
.
extraPodSpec
.
mainContainer
.
args
=
args
except
(
ValueError
,
KeyError
):
# Service might not exist (e.g., in aggregated mode)
logger
.
debug
(
f
"Skipping
{
sub_component_type
}
service as it doesn't exist"
)
continue
return
cfg
.
model_dump
()
@
classmethod
@
classmethod
def
update_image
(
cls
,
config
,
image
:
str
)
->
dict
:
def
update_image
(
cls
,
config
,
image
:
str
)
->
dict
:
"""Update container image for all DGD services (frontend, planner, workers)."""
"""Update container image for all DGD services (frontend, planner, workers)."""
...
...
benchmarks/profiler/utils/config_modifiers/vllm.py
View file @
fbe6bb0a
...
@@ -16,6 +16,7 @@ from benchmarks.profiler.utils.config import (
...
@@ -16,6 +16,7 @@ from benchmarks.profiler.utils.config import (
update_image
,
update_image
,
validate_and_get_worker_args
,
validate_and_get_worker_args
,
)
)
from
benchmarks.profiler.utils.config_modifiers.protocol
import
BaseConfigModifier
from
benchmarks.profiler.utils.defaults
import
(
from
benchmarks.profiler.utils.defaults
import
(
DEFAULT_MODEL_NAME
,
DEFAULT_MODEL_NAME
,
DYNAMO_RUN_DEFAULT_PORT
,
DYNAMO_RUN_DEFAULT_PORT
,
...
@@ -37,39 +38,16 @@ logger.addHandler(console_handler)
...
@@ -37,39 +38,16 @@ logger.addHandler(console_handler)
DEFAULT_VLLM_CONFIG_PATH
=
"examples/backends/vllm/deploy/disagg.yaml"
DEFAULT_VLLM_CONFIG_PATH
=
"examples/backends/vllm/deploy/disagg.yaml"
class
VllmV1ConfigModifier
:
class
VllmV1ConfigModifier
(
BaseConfigModifier
):
BACKEND
=
"vllm"
# vllm uses a different arg for model path
WORKER_MODEL_PATH_ARG
=
"--model"
@
classmethod
@
classmethod
def
load_default_config
(
cls
)
->
dict
:
def
load_default_config
(
cls
)
->
dict
:
with
open
(
DEFAULT_VLLM_CONFIG_PATH
,
"r"
)
as
f
:
with
open
(
DEFAULT_VLLM_CONFIG_PATH
,
"r"
)
as
f
:
return
yaml
.
safe_load
(
f
)
return
yaml
.
safe_load
(
f
)
@
classmethod
def
update_model
(
cls
,
config
,
model_name
:
str
)
->
dict
:
# change the model to serve
cfg
=
Config
.
model_validate
(
config
)
# Update model for both prefill and decode workers
for
sub_component_type
in
[
SubComponentType
.
PREFILL
,
SubComponentType
.
DECODE
]:
try
:
worker_service
=
get_worker_service_from_config
(
cfg
,
backend
=
"vllm"
,
sub_component_type
=
sub_component_type
)
args
=
validate_and_get_worker_args
(
worker_service
,
backend
=
"vllm"
)
args
=
break_arguments
(
args
)
# Update --model (vllm uses --model instead of --model-path and --served-model-name)
args
=
set_argument_value
(
args
,
"--model"
,
model_name
)
worker_service
.
extraPodSpec
.
mainContainer
.
args
=
args
except
(
ValueError
,
KeyError
):
# Service might not exist (e.g., in aggregated mode)
logger
.
debug
(
f
"Skipping
{
sub_component_type
}
service as it doesn't exist"
)
continue
return
cfg
.
model_dump
()
@
classmethod
@
classmethod
def
update_image
(
cls
,
config
,
image
:
str
)
->
dict
:
def
update_image
(
cls
,
config
,
image
:
str
)
->
dict
:
"""Update container image for all DGD services (frontend, planner, workers)."""
"""Update container image for all DGD services (frontend, planner, workers)."""
...
...
benchmarks/profiler/utils/profiler_argparse.py
View file @
fbe6bb0a
...
@@ -66,7 +66,13 @@ def create_profiler_parser() -> argparse.Namespace:
...
@@ -66,7 +66,13 @@ def create_profiler_parser() -> argparse.Namespace:
deployment:
deployment:
namespace: String (kubernetes namespace, default: dynamo-sla-profiler)
namespace: String (kubernetes namespace, default: dynamo-sla-profiler)
service_name: String (service name, default: "")
service_name: String (service name, default: "")
model: String (model to serve, can be HF model name or local model path)
model: String (served model name)
model_cache_pvc_name: String (name of the PVC to mount the model cache,
if not provided, model must be HF name and will download from HF, default: "")
model_cache_pvc_path: String (path to the model cache in the PVC, default: "")
model_cache_pvc_mount_path: String (path to the model cache in the container,
note that the PVC must be mounted to the same path for the profiling job,
default: "/opt/model-cache")
engine:
engine:
backend: String (backend type, currently support [vllm, sglang, trtllm], default: vllm)
backend: String (backend type, currently support [vllm, sglang, trtllm], default: vllm)
config: String (path to the DynamoGraphDeployment config file, default: "")
config: String (path to the DynamoGraphDeployment config file, default: "")
...
@@ -122,7 +128,27 @@ def create_profiler_parser() -> argparse.Namespace:
...
@@ -122,7 +128,27 @@ def create_profiler_parser() -> argparse.Namespace:
"--model"
,
"--model"
,
type
=
str
,
type
=
str
,
default
=
config
.
get
(
"deployment"
,
{}).
get
(
"model"
,
""
),
default
=
config
.
get
(
"deployment"
,
{}).
get
(
"model"
,
""
),
help
=
"Model to serve, can be HF model name or local model path"
,
help
=
"Served model name"
,
)
parser
.
add_argument
(
"--model-cache-pvc-name"
,
type
=
str
,
default
=
config
.
get
(
"deployment"
,
{}).
get
(
"model_cache_pvc_name"
,
""
),
help
=
"Name of the PVC that contains the model weights. If not provided, args.model must be a HF model name and will download from HF"
,
)
parser
.
add_argument
(
"--model-cache-pvc-path"
,
type
=
str
,
default
=
config
.
get
(
"deployment"
,
{}).
get
(
"model_cache_pvc_path"
,
""
),
help
=
"Path to the model cache in the PVC"
,
)
parser
.
add_argument
(
"--model-cache-pvc-mount-path"
,
type
=
str
,
default
=
config
.
get
(
"deployment"
,
{}).
get
(
"model_cache_pvc_mount_path"
,
"/opt/model-cache"
),
help
=
"Path to the model cache in the container, note that the PVC must be mounted to the same path for the profiling job"
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--dgd-image"
,
"--dgd-image"
,
...
...
benchmarks/profiler/utils/search_space_autogen.py
View file @
fbe6bb0a
...
@@ -44,7 +44,17 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
...
@@ -44,7 +44,17 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
if
args
.
model
:
if
args
.
model
:
logger
.
info
(
f
"Updating model in DGD config file to
{
args
.
model
}
"
)
logger
.
info
(
f
"Updating model in DGD config file to
{
args
.
model
}
"
)
config
=
config_modifier
.
update_model
(
config
,
args
.
model
)
if
args
.
model_cache_pvc_name
:
config
=
config_modifier
.
update_model_from_pvc
(
config
,
args
.
model
,
args
.
model_cache_pvc_name
,
args
.
model_cache_pvc_mount_path
,
args
.
model_cache_pvc_path
,
)
else
:
# Non-PVC: workers download from HF, so model_path == model_name
config
=
config_modifier
.
update_model
(
config
,
args
.
model
,
args
.
model
)
if
args
.
dgd_image
:
if
args
.
dgd_image
:
logger
.
info
(
f
"Updating DGD image to
{
args
.
dgd_image
}
"
)
logger
.
info
(
f
"Updating DGD image to
{
args
.
dgd_image
}
"
)
config
=
config_modifier
.
update_image
(
config
,
args
.
dgd_image
)
config
=
config_modifier
.
update_image
(
config
,
args
.
dgd_image
)
...
@@ -58,11 +68,30 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
...
@@ -58,11 +68,30 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
# get model info and update args
# get model info and update args
model_info
:
ModelInfo
|
None
=
None
model_info
:
ModelInfo
|
None
=
None
if
not
args
.
model
:
model_name_or_path
=
""
if
args
.
model
:
# prioritize using model cache in PVC over downloading from HF
if
args
.
model_cache_pvc_name
:
# Keep consistent path normalization with config mutation logic
model_name_or_path
=
config_modifier
.
_normalize_model_path
(
args
.
model_cache_pvc_mount_path
,
args
.
model_cache_pvc_path
)
else
:
model_name_or_path
=
args
.
model
else
:
# get the model name from config
# get the model name from config
args
.
model
=
config_modifier
.
get_model_name
(
config
)
args
.
model
=
config_modifier
.
get_model_name
(
config
)
logger
.
info
(
f
"Getting model info for
{
args
.
model
}
..."
)
model_name_or_path
=
args
.
model
model_info
=
get_model_info
(
args
.
model
)
logger
.
info
(
f
"Getting model info for
{
args
.
model
}
at
{
model_name_or_path
}
..."
)
try
:
model_info
=
get_model_info
(
model_name_or_path
)
except
Exception
as
e
:
# Common in dry-run mode when the PVC isn't mounted locally.
logger
.
warning
(
f
"Failed to load model info from local path '
{
model_name_or_path
}
':
{
e
}
. "
f
"Trying to download from HF for '
{
args
.
model
}
'."
)
model_info
=
get_model_info
(
args
.
model
)
num_experts_str
=
(
num_experts_str
=
(
f
", num_experts=
{
model_info
.
num_experts
}
"
f
", num_experts=
{
model_info
.
num_experts
}
"
...
...
tests/profiler/test_profile_sla_dryrun.py
View file @
fbe6bb0a
...
@@ -74,6 +74,9 @@ class TestProfileSLADryRun:
...
@@ -74,6 +74,9 @@ class TestProfileSLADryRun:
self
.
num_gpus_per_node
=
8
self
.
num_gpus_per_node
=
8
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
# Provide minimal model_info to avoid HF queries
# Provide minimal model_info to avoid HF queries
self
.
model_info
=
ModelInfo
(
self
.
model_info
=
ModelInfo
(
model_size
=
16384.0
,
model_size
=
16384.0
,
...
@@ -118,6 +121,9 @@ class TestProfileSLADryRun:
...
@@ -118,6 +121,9 @@ class TestProfileSLADryRun:
self
.
num_gpus_per_node
=
8
self
.
num_gpus_per_node
=
8
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
self
.
model_info
=
ModelInfo
(
self
.
model_info
=
ModelInfo
(
model_size
=
16384.0
,
model_size
=
16384.0
,
architecture
=
"TestArchitecture"
,
architecture
=
"TestArchitecture"
,
...
@@ -183,6 +189,9 @@ class TestProfileSLADryRun:
...
@@ -183,6 +189,9 @@ class TestProfileSLADryRun:
self
.
num_gpus_per_node
=
8
self
.
num_gpus_per_node
=
8
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
self
.
model_info
=
ModelInfo
(
self
.
model_info
=
ModelInfo
(
model_size
=
16384.0
,
model_size
=
16384.0
,
architecture
=
"TestArchitecture"
,
architecture
=
"TestArchitecture"
,
...
@@ -237,6 +246,10 @@ class TestProfileSLADryRun:
...
@@ -237,6 +246,10 @@ class TestProfileSLADryRun:
self
.
num_gpus_per_node
=
8
self
.
num_gpus_per_node
=
8
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
# Added in newer profiler versions; keep Args compatible with search_space_autogen
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
self
.
model_info
=
ModelInfo
(
self
.
model_info
=
ModelInfo
(
model_size
=
65536.0
,
model_size
=
65536.0
,
architecture
=
"TestMoEArchitecture"
,
architecture
=
"TestMoEArchitecture"
,
...
@@ -315,6 +328,9 @@ class TestProfileSLADryRun:
...
@@ -315,6 +328,9 @@ class TestProfileSLADryRun:
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
self
.
enable_gpu_discovery
=
True
self
.
enable_gpu_discovery
=
True
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
return
Args
()
return
Args
()
...
@@ -383,6 +399,9 @@ class TestProfileSLADryRun:
...
@@ -383,6 +399,9 @@ class TestProfileSLADryRun:
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
self
.
enable_gpu_discovery
=
True
self
.
enable_gpu_discovery
=
True
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
return
Args
()
return
Args
()
...
@@ -451,6 +470,9 @@ class TestProfileSLADryRun:
...
@@ -451,6 +470,9 @@ class TestProfileSLADryRun:
self
.
deploy_after_profile
=
False
self
.
deploy_after_profile
=
False
self
.
pick_with_webui
=
False
self
.
pick_with_webui
=
False
self
.
enable_gpu_discovery
=
True
self
.
enable_gpu_discovery
=
True
self
.
model_cache_pvc_name
=
""
self
.
model_cache_pvc_path
=
""
self
.
model_cache_pvc_mount_path
=
"/opt/model-cache"
return
Args
()
return
Args
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment