Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
6be5c196
Unverified
Commit
6be5c196
authored
Aug 06, 2025
by
Graham King
Committed by
GitHub
Aug 06, 2025
Browse files
docs(dynamo-run): Remove vllm/sglang/trtllm engines from dynamo-run docs (#2332)
parent
c264018a
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
83 additions
and
309 deletions
+83
-309
docs/guides/dynamo_run.md
docs/guides/dynamo_run.md
+66
-305
launch/dynamo-run/src/flags.rs
launch/dynamo-run/src/flags.rs
+12
-2
launch/dynamo-run/src/main.rs
launch/dynamo-run/src/main.rs
+5
-2
No files found.
docs/guides/dynamo_run.md
View file @
6be5c196
This diff is collapsed.
Click to expand it.
launch/dynamo-run/src/flags.rs
View file @
6be5c196
...
@@ -74,7 +74,6 @@ pub struct Flags {
...
@@ -74,7 +74,6 @@ pub struct Flags {
/// Maximum number of batched tokens for KV routing
/// Maximum number of batched tokens for KV routing
/// Needed for informing the KV router
/// Needed for informing the KV router
/// TODO: derive from vllm args
/// NOTE: this is not actually used for now
/// NOTE: this is not actually used for now
#[arg(long,
default_value
=
"8192"
)]
#[arg(long,
default_value
=
"8192"
)]
pub
max_num_batched_tokens
:
Option
<
u32
>
,
pub
max_num_batched_tokens
:
Option
<
u32
>
,
...
@@ -103,10 +102,11 @@ pub struct Flags {
...
@@ -103,10 +102,11 @@ pub struct Flags {
#[arg(long)]
#[arg(long)]
pub
context_length
:
Option
<
u32
>
,
pub
context_length
:
Option
<
u32
>
,
/// KV cache block size (
vllm only
)
/// KV cache block size (
is this used? Maybe by Python vllm worker?
)
#[arg(long)]
#[arg(long)]
pub
kv_cache_block_size
:
Option
<
u32
>
,
pub
kv_cache_block_size
:
Option
<
u32
>
,
/// Mocker engine only.
/// Additional engine-specific arguments from a JSON file.
/// Additional engine-specific arguments from a JSON file.
/// Contains a mapping of parameter names to values.
/// Contains a mapping of parameter names to values.
#[arg(long)]
#[arg(long)]
...
@@ -203,6 +203,16 @@ impl Flags {
...
@@ -203,6 +203,16 @@ impl Flags {
// nothing to check here
// nothing to check here
}
}
}
}
match
out_opt
{
Output
::
Mocker
=>
{}
_
=>
{
if
self
.extra_engine_args
.is_some
()
{
anyhow
::
bail!
(
"`--extra-engine-args` is only for the mocker engine"
);
}
}
}
Ok
(())
Ok
(())
}
}
...
...
launch/dynamo-run/src/main.rs
View file @
6be5c196
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
use
std
::
env
;
use
std
::
env
;
use
clap
::
Parser
;
use
clap
::
{
CommandFactory
as
_
,
Parser
}
;
use
dynamo_llm
::
entrypoint
::
input
::
Input
;
use
dynamo_llm
::
entrypoint
::
input
::
Input
;
use
dynamo_run
::
Output
;
use
dynamo_run
::
Output
;
...
@@ -22,9 +22,11 @@ Example:
...
@@ -22,9 +22,11 @@ Example:
- cd target/debug
- cd target/debug
- ./dynamo-run Qwen/Qwen3-0.6B
- ./dynamo-run Qwen/Qwen3-0.6B
- OR: ./dynamo-run /data/models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
- OR: ./dynamo-run /data/models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
See `docs/guides/dynamo_run.md` in the repo for full details.
"#
;
"#
;
const
USAGE
:
&
str
=
"USAGE: dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>]
[--tensor-parallel-size=1]
[--context-length=N] [--kv-cache-block-size=16]
[--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0]
[--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--
kv-gpu-cache-usage-weight=1.0] [--kv-waiting-requests-weight
=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"
;
const
USAGE
:
&
str
=
"USAGE: dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--
router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens
=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"
;
fn
main
()
->
anyhow
::
Result
<
()
>
{
fn
main
()
->
anyhow
::
Result
<
()
>
{
// Set log level based on verbosity flag
// Set log level based on verbosity flag
...
@@ -71,6 +73,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
...
@@ -71,6 +73,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
let
usage
=
USAGE
.replace
(
"ENGINE_LIST"
,
&
engine_list
);
let
usage
=
USAGE
.replace
(
"ENGINE_LIST"
,
&
engine_list
);
println!
(
"{usage}"
);
println!
(
"{usage}"
);
println!
(
"{HELP}"
);
println!
(
"{HELP}"
);
dynamo_run
::
Flags
::
command
()
.print_long_help
()
.unwrap
();
return
Ok
(());
return
Ok
(());
}
else
if
args
[
0
]
==
"--version"
{
}
else
if
args
[
0
]
==
"--version"
{
if
let
Some
(
describe
)
=
option_env!
(
"VERGEN_GIT_DESCRIBE"
)
{
if
let
Some
(
describe
)
=
option_env!
(
"VERGEN_GIT_DESCRIBE"
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment