Unverified Commit 6be5c196 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

docs(dynamo-run): Remove vllm/sglang/trtllm engines from dynamo-run docs (#2332)

parent c264018a
This diff is collapsed.
......@@ -74,7 +74,6 @@ pub struct Flags {
/// Maximum number of batched tokens for KV routing
/// Needed for informing the KV router
/// TODO: derive from vllm args
/// NOTE: this is not actually used for now
#[arg(long, default_value = "8192")]
pub max_num_batched_tokens: Option<u32>,
......@@ -103,10 +102,11 @@ pub struct Flags {
#[arg(long)]
pub context_length: Option<u32>,
/// KV cache block size (vllm only)
/// KV cache block size (is this used? Maybe by Python vllm worker?)
#[arg(long)]
pub kv_cache_block_size: Option<u32>,
/// Mocker engine only.
/// Additional engine-specific arguments from a JSON file.
/// Contains a mapping of parameter names to values.
#[arg(long)]
......@@ -203,6 +203,16 @@ impl Flags {
// nothing to check here
}
}
match out_opt {
Output::Mocker => {}
_ => {
if self.extra_engine_args.is_some() {
anyhow::bail!("`--extra-engine-args` is only for the mocker engine");
}
}
}
Ok(())
}
......
......@@ -3,7 +3,7 @@
use std::env;
use clap::Parser;
use clap::{CommandFactory as _, Parser};
use dynamo_llm::entrypoint::input::Input;
use dynamo_run::Output;
......@@ -22,9 +22,11 @@ Example:
- cd target/debug
- ./dynamo-run Qwen/Qwen3-0.6B
- OR: ./dynamo-run /data/models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
See `docs/guides/dynamo_run.md` in the repo for full details.
"#;
const USAGE: &str = "USAGE: dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--context-length=N] [--kv-cache-block-size=16] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--kv-gpu-cache-usage-weight=1.0] [--kv-waiting-requests-weight=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]";
const USAGE: &str = "USAGE: dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--static-worker] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]";
fn main() -> anyhow::Result<()> {
// Set log level based on verbosity flag
......@@ -71,6 +73,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
let usage = USAGE.replace("ENGINE_LIST", &engine_list);
println!("{usage}");
println!("{HELP}");
dynamo_run::Flags::command().print_long_help().unwrap();
return Ok(());
} else if args[0] == "--version" {
if let Some(describe) = option_env!("VERGEN_GIT_DESCRIBE") {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment