Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a82acfa0
Unverified
Commit
a82acfa0
authored
Feb 12, 2026
by
jh-nv
Committed by
GitHub
Feb 12, 2026
Browse files
feat: Refactor frontend CLI configuration (#6201)
parent
948d6d85
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
721 additions
and
368 deletions
+721
-368
components/src/dynamo/common/configuration/utils.py
components/src/dynamo/common/configuration/utils.py
+40
-27
components/src/dynamo/common/tests/configuration/test_utils.py
...nents/src/dynamo/common/tests/configuration/test_utils.py
+82
-0
components/src/dynamo/frontend/frontend_args.py
components/src/dynamo/frontend/frontend_args.py
+502
-0
components/src/dynamo/frontend/main.py
components/src/dynamo/frontend/main.py
+93
-340
components/src/dynamo/frontend/vllm_processor.py
components/src/dynamo/frontend/vllm_processor.py
+4
-1
No files found.
components/src/dynamo/common/configuration/utils.py
View file @
a82acfa0
...
...
@@ -5,12 +5,16 @@
import
argparse
import
os
from
typing
import
Any
,
Optional
,
TypeVar
from
typing
import
Any
,
Callable
,
Optional
,
TypeVar
,
Union
T
=
TypeVar
(
"T"
)
def
env_or_default
(
env_var
:
str
,
default
:
T
)
->
T
:
def
env_or_default
(
env_var
:
str
,
default
:
T
,
value_type
:
Optional
[
Union
[
type
,
Callable
[...,
Any
]]]
=
None
,
)
->
T
:
"""
Get value from environment variable or return default.
...
...
@@ -19,32 +23,35 @@ def env_or_default(env_var: str, default: T) -> T:
Args:
env_var: Environment variable name (e.g., "DYN_NAMESPACE")
default: Default value if env var not set
value_type: If provided, use this type to convert the env value. If None, the type
is taken from type(default). Use value_type when default is None but you still
want the env value coerced (e.g. env_or_default("DYN_FOO", None, value_type=int)).
Returns:
Environment variable value (type-converted) or default
Examples:
>>> env_or_default("DYN_NAMESPACE", "test")
"test" # if DYN_NAMESPACE not set
>>> env_or_default("DYN_MIGRATION_LIMIT", 0)
5 # if DYN_MIGRATION_LIMIT="5"
"""
value
=
os
.
environ
.
get
(
env_var
)
if
value
is
None
:
return
default
# Type conversion based on default type
if
isinstance
(
default
,
bool
):
# No type info available: default=None and no explicit value_type.
if
value_type
is
None
and
default
is
None
:
return
value
# type: ignore[return-value]
# Prefer the explicit type if provided; otherwise derive from default
target_type
=
value_type
if
value_type
is
not
None
else
type
(
default
)
if
target_type
is
bool
:
return
value
.
lower
()
in
(
"true"
,
"1"
,
"yes"
,
"on"
)
# type: ignore
el
if
isinstance
(
default
,
int
)
:
if
target_type
is
int
:
return
int
(
value
)
# type: ignore
el
if
isinstance
(
default
,
float
)
:
if
target_type
is
float
:
return
float
(
value
)
# type: ignore
elif
isinstance
(
default
,
list
):
# Env vars for list options (e.g. DYN_CONNECTOR) are space-separated; downstream expects a list.
if
target_type
is
list
:
return
[
x
.
strip
()
for
x
in
value
.
split
()
if
x
.
strip
()]
# type: ignore
else
:
return
value
# type: ignore
# Fall back to calling the type/callable for custom validators (e.g., pathlib.Path)
return
target_type
(
value
)
if
callable
(
target_type
)
else
value
# type: ignore
def
add_argument
(
...
...
@@ -55,7 +62,7 @@ def add_argument(
default
:
Any
,
help
:
str
,
obsolete_flag
:
Optional
[
str
]
=
None
,
arg_type
:
Optional
[
type
]
=
str
,
arg_type
:
Optional
[
Union
[
type
,
Callable
[...,
Any
]]
]
=
str
,
**
kwargs
:
Any
,
)
->
None
:
"""
...
...
@@ -74,7 +81,12 @@ def add_argument(
arg_type: Type for the argument (default: str)
"""
arg_dest
=
_get_dest_name
(
flag_name
,
kwargs
.
get
(
"dest"
))
default_with_env
=
env_or_default
(
env_var
,
default
)
value_type_for_env
:
Optional
[
Union
[
type
,
Callable
[...,
Any
]]]
=
None
if
arg_type
is
not
None
and
isinstance
(
arg_type
,
type
):
value_type_for_env
=
arg_type
if
isinstance
(
default
,
list
)
and
(
arg_type
is
None
or
arg_type
is
str
):
value_type_for_env
=
None
default_with_env
=
env_or_default
(
env_var
,
default
,
value_type
=
value_type_for_env
)
names
=
[
flag_name
]
...
...
@@ -88,8 +100,9 @@ def add_argument(
"dest"
:
arg_dest
,
"default"
:
default_with_env
,
"help"
:
env_help
,
"type"
:
arg_type
,
}
if
arg_type
is
not
None
:
add_arg_opts
[
"type"
]
=
arg_type
kwargs
.
update
(
add_arg_opts
)
parser
.
add_argument
(
*
names
,
**
kwargs
)
...
...
@@ -114,15 +127,15 @@ def add_negatable_bool_argument(
default: Default value
help: Help text
"""
arg_dest
=
_get_dest_name
(
flag_name
,
dest
)
default_with_env
=
env_or_default
(
env_var
,
default
)
parser
.
add_argument
(
flag_name
,
dest
=
arg_dest
,
add_argument
(
parser
,
flag_name
=
flag_name
,
env_var
=
env_var
,
default
=
default
,
help
=
help
,
dest
=
dest
,
arg_type
=
None
,
action
=
argparse
.
BooleanOptionalAction
,
default
=
default_with_env
,
help
=
_build_help_message
(
help
,
env_var
,
default
),
)
...
...
components/src/dynamo/common/tests/configuration/test_utils.py
View file @
a82acfa0
...
...
@@ -7,12 +7,14 @@ import argparse
import
pytest
from
dynamo.common.configuration.utils
import
(
add_argument
,
add_negatable_bool_argument
,
env_or_default
,
)
pytestmark
=
[
pytest
.
mark
.
unit
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
pre_merge
,
]
...
...
@@ -88,6 +90,86 @@ class TestEnvOrDefault:
# Bool
assert
isinstance
(
env_or_default
(
"TEST_VAR"
,
True
),
bool
)
def
test_none_default_with_no_value_type_returns_raw_env_value
(
self
,
monkeypatch
):
"""Test env value is returned as string when no type info is available."""
monkeypatch
.
setenv
(
"TEST_VAR"
,
"env_value"
)
result
=
env_or_default
(
"TEST_VAR"
,
None
,
value_type
=
None
)
assert
result
==
"env_value"
assert
isinstance
(
result
,
str
)
class
TestAddArgument
:
"""Test add_argument function."""
def
test_callable_type_with_none_default_uses_env_and_validates
(
self
,
monkeypatch
):
"""Test callable arg_type works when default is None and env var is set."""
monkeypatch
.
setenv
(
"TEST_MODEL_NAME"
,
" model-A "
)
parser
=
argparse
.
ArgumentParser
()
def
validate_model_name
(
value
:
str
)
->
str
:
if
len
(
value
.
strip
())
==
0
:
raise
argparse
.
ArgumentTypeError
(
"model-name must be non-empty"
)
return
value
.
strip
()
add_argument
(
parser
,
flag_name
=
"--model-name"
,
env_var
=
"TEST_MODEL_NAME"
,
default
=
None
,
help
=
"Model name"
,
arg_type
=
validate_model_name
,
)
args
=
parser
.
parse_args
([])
assert
args
.
model_name
==
"model-A"
def
test_callable_type_with_none_default_uses_none_when_env_unset
(
self
,
monkeypatch
):
"""Test callable arg_type keeps None default when env var is not set."""
monkeypatch
.
delenv
(
"TEST_MODEL_NAME"
,
raising
=
False
)
parser
=
argparse
.
ArgumentParser
()
def
validate_model_name
(
value
:
str
)
->
str
:
if
len
(
value
.
strip
())
==
0
:
raise
argparse
.
ArgumentTypeError
(
"model-name must be non-empty"
)
return
value
.
strip
()
add_argument
(
parser
,
flag_name
=
"--model-name"
,
env_var
=
"TEST_MODEL_NAME"
,
default
=
None
,
help
=
"Model name"
,
arg_type
=
validate_model_name
,
)
args
=
parser
.
parse_args
([])
assert
args
.
model_name
is
None
def
test_callable_type_with_invalid_env_value_fails_parse
(
self
,
monkeypatch
):
"""Test invalid env value still fails validation via argparse type callable."""
monkeypatch
.
setenv
(
"TEST_MODEL_NAME"
,
" "
)
parser
=
argparse
.
ArgumentParser
()
def
validate_model_name
(
value
:
str
)
->
str
:
if
len
(
value
.
strip
())
==
0
:
raise
argparse
.
ArgumentTypeError
(
"model-name must be non-empty"
)
return
value
.
strip
()
add_argument
(
parser
,
flag_name
=
"--model-name"
,
env_var
=
"TEST_MODEL_NAME"
,
default
=
None
,
help
=
"Model name"
,
arg_type
=
validate_model_name
,
)
with
pytest
.
raises
(
SystemExit
):
parser
.
parse_args
([])
class
TestAddNegatableBool
:
"""Test add_negatable_bool function."""
...
...
components/src/dynamo/frontend/frontend_args.py
0 → 100644
View file @
a82acfa0
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
os
import
pathlib
from
typing
import
Any
,
Dict
,
Optional
from
dynamo.common.config_dump
import
register_encoder
from
dynamo.common.configuration.arg_group
import
ArgGroup
from
dynamo.common.configuration.config_base
import
ConfigBase
from
dynamo.common.configuration.utils
import
(
add_argument
,
add_negatable_bool_argument
,
env_or_default
,
)
from
.
import
__version__
def
validate_model_name
(
value
:
str
)
->
str
:
"""Validate that model-name is a non-empty string."""
if
not
value
or
not
isinstance
(
value
,
str
)
or
len
(
value
.
strip
())
==
0
:
raise
argparse
.
ArgumentTypeError
(
f
"model-name must be a non-empty string, got:
{
value
}
"
)
return
value
.
strip
()
def
validate_model_path
(
value
:
str
)
->
str
:
"""Validate that model-path is a valid directory on disk."""
if
not
os
.
path
.
isdir
(
value
):
raise
argparse
.
ArgumentTypeError
(
f
"model-path must be a valid directory on disk, got:
{
value
}
"
)
return
value
class
FrontendConfig
(
ConfigBase
):
"""Configuration for the Dynamo frontend."""
interactive
:
bool
kv_cache_block_size
:
Optional
[
int
]
http_host
:
str
http_port
:
int
tls_cert_path
:
Optional
[
pathlib
.
Path
]
tls_key_path
:
Optional
[
pathlib
.
Path
]
router_mode
:
str
kv_overlap_score_weight
:
float
router_temperature
:
float
use_kv_events
:
bool
router_ttl
:
float
router_max_tree_size
:
int
router_prune_target_ratio
:
float
namespace
:
Optional
[
str
]
=
None
router_replica_sync
:
bool
router_snapshot_threshold
:
int
router_reset_states
:
bool
durable_kv_events
:
bool
router_track_active_blocks
:
bool
router_assume_kv_reuse
:
bool
router_track_output_blocks
:
bool
router_event_threads
:
int
enforce_disagg
:
bool
migration_limit
:
int
active_decode_blocks_threshold
:
Optional
[
float
]
active_prefill_tokens_threshold
:
Optional
[
int
]
active_prefill_tokens_threshold_frac
:
Optional
[
float
]
model_name
:
Optional
[
str
]
model_path
:
Optional
[
str
]
metrics_prefix
:
Optional
[
str
]
=
None
kserve_grpc_server
:
bool
grpc_metrics_port
:
int
dump_config_to
:
Optional
[
str
]
store_kv
:
str
request_plane
:
str
event_plane
:
str
chat_processor
:
str
exp_python_factory
:
bool
def
validate
(
self
)
->
None
:
if
bool
(
self
.
tls_cert_path
)
^
bool
(
self
.
tls_key_path
):
# ^ is XOR
raise
ValueError
(
"--tls-cert-path and --tls-key-path must be provided together"
)
if
self
.
migration_limit
<
0
or
self
.
migration_limit
>
4294967295
:
raise
ValueError
(
"--migration-limit must be between 0 and 4294967295 (0=disabled)"
)
@
register_encoder
(
FrontendConfig
)
def
_preprocess_for_encode_config
(
config
:
FrontendConfig
)
->
Dict
[
str
,
Any
]:
"""Convert FrontendConfig object to dictionary for encoding."""
return
config
.
__dict__
class
FrontendArgGroup
(
ArgGroup
):
"""Frontend configuration parameters."""
def
add_arguments
(
self
,
parser
)
->
None
:
parser
.
add_argument
(
"--version"
,
action
=
"version"
,
version
=
f
"Dynamo Frontend
{
__version__
}
"
)
g
=
parser
.
add_argument_group
(
"Dynamo Frontend Options"
)
# Interactive needs -i short option; use raw add_argument with BooleanOptionalAction
g
.
add_argument
(
"-i"
,
"--interactive"
,
dest
=
"interactive"
,
action
=
argparse
.
BooleanOptionalAction
,
default
=
env_or_default
(
"DYN_INTERACTIVE"
,
False
),
help
=
"Interactive text chat.
\n
env var: DYN_INTERACTIVE"
,
)
add_argument
(
g
,
flag_name
=
"--kv-cache-block-size"
,
env_var
=
"DYN_KV_CACHE_BLOCK_SIZE"
,
default
=
None
,
help
=
"KV cache block size (u32)."
,
arg_type
=
int
,
)
add_argument
(
g
,
flag_name
=
"--http-host"
,
env_var
=
"DYN_HTTP_HOST"
,
default
=
"0.0.0.0"
,
help
=
"HTTP host for the engine (str)."
,
)
add_argument
(
g
,
flag_name
=
"--http-port"
,
env_var
=
"DYN_HTTP_PORT"
,
default
=
8000
,
help
=
"HTTP port for the engine (u16)."
,
arg_type
=
int
,
)
add_argument
(
g
,
flag_name
=
"--tls-cert-path"
,
env_var
=
"DYN_TLS_CERT_PATH"
,
default
=
None
,
help
=
"TLS certificate path, PEM format."
,
arg_type
=
pathlib
.
Path
,
)
add_argument
(
g
,
flag_name
=
"--tls-key-path"
,
env_var
=
"DYN_TLS_KEY_PATH"
,
default
=
None
,
help
=
"TLS certificate key path, PEM format."
,
arg_type
=
pathlib
.
Path
,
)
add_argument
(
g
,
flag_name
=
"--router-mode"
,
env_var
=
"DYN_ROUTER_MODE"
,
default
=
"round-robin"
,
help
=
"How to route the request."
,
choices
=
[
"round-robin"
,
"random"
,
"kv"
],
)
add_argument
(
g
,
flag_name
=
"--kv-overlap-score-weight"
,
env_var
=
"DYN_KV_OVERLAP_SCORE_WEIGHT"
,
default
=
1.0
,
help
=
(
"KV Router: Weight for overlap score in worker selection. "
"Higher values prioritize KV cache reuse."
),
arg_type
=
float
,
)
add_argument
(
g
,
flag_name
=
"--router-temperature"
,
env_var
=
"DYN_ROUTER_TEMPERATURE"
,
default
=
0.0
,
help
=
(
"KV Router: Temperature for worker sampling via softmax. Higher values "
"promote more randomness, and 0 fallbacks to deterministic."
),
arg_type
=
float
,
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--kv-events"
,
env_var
=
"DYN_KV_EVENTS"
,
default
=
True
,
help
=
(
"KV Router: Enable/disable KV events. Use --kv-events to enable "
"(default, router receives cache state events from workers) or --no-kv-events "
"to disable (router predicts cache state based on routing decisions)."
),
dest
=
"use_kv_events"
,
)
add_argument
(
g
,
flag_name
=
"--router-ttl"
,
env_var
=
"DYN_ROUTER_TTL"
,
default
=
120.0
,
help
=
(
"KV Router: Time-to-live in seconds for blocks when KV events are disabled. "
"Only used when --no-kv-events is set."
),
arg_type
=
float
,
)
add_argument
(
g
,
flag_name
=
"--router-max-tree-size"
,
env_var
=
"DYN_ROUTER_MAX_TREE_SIZE"
,
default
=
2
**
20
,
help
=
(
"KV Router: Maximum tree size before pruning when KV events are disabled. "
"Only used when --no-kv-events is set."
),
arg_type
=
int
,
)
add_argument
(
g
,
flag_name
=
"--router-prune-target-ratio"
,
env_var
=
"DYN_ROUTER_PRUNE_TARGET_RATIO"
,
default
=
0.8
,
help
=
(
"KV Router: Target size ratio after pruning when KV events are disabled. "
"Only used when --no-kv-events is set."
),
arg_type
=
float
,
)
add_argument
(
g
,
flag_name
=
"--namespace"
,
env_var
=
"DYN_NAMESPACE"
,
default
=
None
,
help
=
(
"Dynamo namespace for model discovery scoping. If specified, models will "
"only be discovered from this namespace. If not specified, discovers models "
"from all namespaces (global discovery)."
),
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--router-replica-sync"
,
env_var
=
"DYN_ROUTER_REPLICA_SYNC"
,
default
=
False
,
help
=
(
"KV Router: Enable replica synchronization across multiple router instances. "
"When true, routers will publish and subscribe to events to maintain "
"consistent state."
),
)
add_argument
(
g
,
flag_name
=
"--router-snapshot-threshold"
,
env_var
=
"DYN_ROUTER_SNAPSHOT_THRESHOLD"
,
default
=
1000000
,
help
=
(
"KV Router: Number of messages in stream before triggering a snapshot. "
),
arg_type
=
int
,
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--router-reset-states"
,
env_var
=
"DYN_ROUTER_RESET_STATES"
,
default
=
False
,
help
=
(
"KV Router: Reset router state on startup, purging stream and object store. "
"By default, states are persisted. WARNING: This can affect existing router "
"replicas."
),
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--durable-kv-events"
,
env_var
=
"DYN_DURABLE_KV_EVENTS"
,
default
=
False
,
help
=
(
"KV Router: Enable durable KV events using NATS JetStream instead of NATS Core. "
"By default, the router uses the generic event plane (NATS Core or ZMQ) with "
"local_indexer mode. Use this flag when you need durability and multi-replica "
"consistency. Requires NATS with JetStream enabled."
),
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--track-active-blocks"
,
env_var
=
"DYN_TRACK_ACTIVE_BLOCKS"
,
default
=
True
,
dest
=
"router_track_active_blocks"
,
help
=
(
"KV Router: Track active blocks (blocks being used for ongoing generation). "
"By default, active blocks are tracked for load balancing. "
),
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--assume-kv-reuse"
,
env_var
=
"DYN_ASSUME_KV_REUSE"
,
default
=
True
,
dest
=
"router_assume_kv_reuse"
,
help
=
(
"KV Router: When tracking active blocks, assume KV cache reuse. "
"Use --no-assume-kv-reuse to generate random hashes instead (when KV cache reuse is not expected)."
),
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--track-output-blocks"
,
env_var
=
"DYN_ROUTER_TRACK_OUTPUT_BLOCKS"
,
default
=
False
,
dest
=
"router_track_output_blocks"
,
help
=
(
"KV Router: Track output blocks during generation. When enabled, the router adds "
"placeholder blocks as tokens are generated and applies fractional decay based on "
"progress toward expected_output_tokens."
),
)
add_argument
(
g
,
flag_name
=
"--router-event-threads"
,
env_var
=
"DYN_ROUTER_EVENT_THREADS"
,
default
=
1
,
help
=
(
"KV Router: Number of event processing threads. When > 1, uses a concurrent radix tree with a thread pool for higher throughput."
),
arg_type
=
int
,
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--enforce-disagg"
,
env_var
=
"DYN_ENFORCE_DISAGG"
,
default
=
False
,
help
=
(
"Enforce disaggregated prefill-decode. When set, unactivated prefill router will "
"return an error instead of falling back to decode-only mode."
),
)
add_argument
(
g
,
flag_name
=
"--migration-limit"
,
env_var
=
"DYN_MIGRATION_LIMIT"
,
default
=
0
,
help
=
(
"Maximum number of times a request may be migrated to a different engine worker. "
"When > 0, enables request migration on worker disconnect."
),
arg_type
=
int
,
)
add_argument
(
g
,
flag_name
=
"--active-decode-blocks-threshold"
,
env_var
=
"DYN_ACTIVE_DECODE_BLOCKS_THRESHOLD"
,
default
=
None
,
help
=
(
"Threshold percentage (0.0-1.0) for determining when a worker is considered busy "
"based on KV cache block utilization. If not set, blocks-based busy detection is disabled."
),
arg_type
=
float
,
)
add_argument
(
g
,
flag_name
=
"--active-prefill-tokens-threshold"
,
env_var
=
"DYN_ACTIVE_PREFILL_TOKENS_THRESHOLD"
,
default
=
None
,
help
=
(
"Literal token count threshold for determining when a worker is considered busy "
"based on prefill token utilization. When active prefill tokens exceed this "
"threshold, the worker is marked as busy. If not set, tokens-based busy detection is disabled."
),
arg_type
=
int
,
)
add_argument
(
g
,
flag_name
=
"--active-prefill-tokens-threshold-frac"
,
env_var
=
"DYN_ACTIVE_PREFILL_TOKENS_THRESHOLD_FRAC"
,
default
=
None
,
help
=
(
"Fraction of max_num_batched_tokens for busy detection. Worker is busy when "
"active_prefill_tokens > frac * max_num_batched_tokens. Default 1.5 (disabled). "
"Uses OR logic with --active-prefill-tokens-threshold."
),
arg_type
=
float
,
)
add_argument
(
g
,
flag_name
=
"--model-name"
,
env_var
=
"DYN_MODEL_NAME"
,
default
=
None
,
help
=
"Model name as a string (e.g., 'Llama-3.2-1B-Instruct')"
,
arg_type
=
validate_model_name
,
)
add_argument
(
g
,
flag_name
=
"--model-path"
,
env_var
=
"DYN_MODEL_PATH"
,
default
=
None
,
help
=
"Path to model directory on disk (e.g., /tmp/model_cache/llama3.2_1B/)"
,
arg_type
=
validate_model_path
,
)
add_argument
(
g
,
flag_name
=
"--metrics-prefix"
,
env_var
=
"DYN_METRICS_PREFIX"
,
default
=
None
,
help
=
(
"Prefix for Dynamo frontend metrics. If unset, uses DYN_METRICS_PREFIX env var "
"or 'dynamo_frontend'."
),
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--kserve-grpc-server"
,
env_var
=
"DYN_KSERVE_GRPC_SERVER"
,
default
=
False
,
help
=
"Start KServe gRPC server."
,
)
add_argument
(
g
,
flag_name
=
"--grpc-metrics-port"
,
env_var
=
"DYN_GRPC_METRICS_PORT"
,
default
=
8788
,
help
=
(
"HTTP metrics port for gRPC service (u16). Only used with --kserve-grpc-server. "
"Defaults to 8788."
),
arg_type
=
int
,
)
add_argument
(
g
,
flag_name
=
"--dump-config-to"
,
env_var
=
"DYN_DUMP_CONFIG_TO"
,
default
=
None
,
help
=
"Dump config to the specified file path."
,
)
add_argument
(
g
,
flag_name
=
"--store-kv"
,
env_var
=
"DYN_STORE_KV"
,
default
=
"etcd"
,
help
=
(
"Which key-value backend to use: etcd, mem, file. Etcd uses the ETCD_* env vars "
"(e.g. ETCD_ENDPOINTS) for connection details. File uses root dir from env var "
"DYN_FILE_KV or defaults to $TMPDIR/dynamo_store_kv."
),
choices
=
[
"etcd"
,
"file"
,
"mem"
],
)
add_argument
(
g
,
flag_name
=
"--request-plane"
,
env_var
=
"DYN_REQUEST_PLANE"
,
default
=
"tcp"
,
help
=
(
"Determines how requests are distributed from routers to workers. "
"'tcp' is fastest [nats|http|tcp]"
),
choices
=
[
"nats"
,
"http"
,
"tcp"
],
)
add_argument
(
g
,
flag_name
=
"--event-plane"
,
env_var
=
"DYN_EVENT_PLANE"
,
default
=
"nats"
,
help
=
"Determines how events are published [nats|zmq]"
,
choices
=
[
"nats"
,
"zmq"
],
)
add_argument
(
g
,
flag_name
=
"--chat-processor"
,
env_var
=
"DYN_CHAT_PROCESSOR"
,
default
=
"dynamo"
,
help
=
(
"[EXPERIMENTAL] When set to 'vllm', use local vllm for the pre and post "
"processor."
),
choices
=
[
"dynamo"
,
"vllm"
],
)
add_negatable_bool_argument
(
g
,
flag_name
=
"--exp-python-factory"
,
env_var
=
"DYN_EXP_PYTHON_FACTORY"
,
default
=
False
,
help
=
(
"[EXPERIMENTAL] Enable Python-based engine factory. When set, engines will be "
"created via a Python callback instead of the default Rust pipeline."
),
)
components/src/dynamo/frontend/main.py
View file @
a82acfa0
This diff is collapsed.
Click to expand it.
components/src/dynamo/frontend/vllm_processor.py
View file @
a82acfa0
...
...
@@ -24,6 +24,7 @@ from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest, FinishReason
from
vllm.v1.engine.input_processor
import
InputProcessor
from
vllm.v1.engine.output_processor
import
OutputProcessor
,
OutputProcessorOutput
from
dynamo.frontend.frontend_args
import
FrontendConfig
from
dynamo.llm
import
(
KvPushRouter
,
ModelCardInstanceId
,
...
...
@@ -367,10 +368,12 @@ class EngineFactory:
self
,
runtime
:
DistributedRuntime
,
router_config
:
RouterConfig
,
config
:
FrontendConfig
,
flags
:
Namespace
,
):
self
.
runtime
=
runtime
self
.
router_config
=
router_config
self
.
config
=
config
self
.
flags
=
flags
async
def
chat_engine_factory
(
...
...
@@ -444,7 +447,7 @@ class EngineFactory:
if
self
.
router_config
.
router_mode
==
RouterMode
.
KV
:
router
=
KvPushRouter
(
endpoint
=
generate_endpoint
,
block_size
=
self
.
flags
.
kv_cache_block_size
or
16
,
block_size
=
self
.
config
.
kv_cache_block_size
or
16
,
kv_router_config
=
self
.
router_config
.
kv_router_config
,
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment