Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bbe82f18
Unverified
Commit
bbe82f18
authored
Feb 12, 2026
by
Graham King
Committed by
GitHub
Feb 12, 2026
Browse files
chore: Remove dynamo-run and mistral-rs engine (#6203)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
2c747d64
Changes
36
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
10 additions
and
1140 deletions
+10
-1140
launch/dynamo-run/src/lib.rs
launch/dynamo-run/src/lib.rs
+0
-222
launch/dynamo-run/src/main.rs
launch/dynamo-run/src/main.rs
+0
-135
launch/dynamo-run/src/opt.rs
launch/dynamo-run/src/opt.rs
+0
-62
lib/bindings/python/examples/cli/cli.py
lib/bindings/python/examples/cli/cli.py
+1
-1
lib/bindings/python/examples/hello_world/server_sglang.py
lib/bindings/python/examples/hello_world/server_sglang.py
+1
-2
lib/bindings/python/examples/hello_world/server_sglang_tok.py
...bindings/python/examples/hello_world/server_sglang_tok.py
+1
-2
lib/bindings/python/examples/hello_world/server_vllm.py
lib/bindings/python/examples/hello_world/server_vllm.py
+1
-2
lib/engines/mistralrs/Cargo.toml
lib/engines/mistralrs/Cargo.toml
+0
-33
lib/engines/mistralrs/src/lib.rs
lib/engines/mistralrs/src/lib.rs
+0
-646
lib/llm/src/entrypoint/input/text.rs
lib/llm/src/entrypoint/input/text.rs
+1
-1
lib/runtime/src/logging.rs
lib/runtime/src/logging.rs
+0
-1
tests/lmcache/deploy-baseline-dynamo-disag.sh
tests/lmcache/deploy-baseline-dynamo-disag.sh
+1
-7
tests/lmcache/deploy-baseline-dynamo.sh
tests/lmcache/deploy-baseline-dynamo.sh
+1
-7
tests/lmcache/deploy-lmcache_enabled-dynamo-disag.sh
tests/lmcache/deploy-lmcache_enabled-dynamo-disag.sh
+1
-7
tests/lmcache/deploy-lmcache_enabled-dynamo.sh
tests/lmcache/deploy-lmcache_enabled-dynamo.sh
+1
-7
tests/lmcache/run_test.sh
tests/lmcache/run_test.sh
+1
-5
No files found.
launch/dynamo-run/src/lib.rs
deleted
100644 → 0
View file @
2c747d64
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
anyhow
::
Context
as
_
;
use
dynamo_llm
::
entrypoint
::
EngineConfig
;
use
dynamo_llm
::
entrypoint
::
input
::
Input
;
use
dynamo_llm
::
local_model
::{
LocalModel
,
LocalModelBuilder
};
use
dynamo_runtime
::
distributed
::{
DistributedConfig
,
RequestPlaneMode
};
use
dynamo_runtime
::
storage
::
kv
;
use
dynamo_runtime
::
transports
::
nats
;
use
dynamo_runtime
::{
DistributedRuntime
,
Runtime
};
mod
flags
;
pub
use
flags
::
Flags
;
mod
opt
;
pub
use
dynamo_llm
::
request_template
::
RequestTemplate
;
pub
use
opt
::
Output
;
pub
async
fn
run
(
runtime
:
Runtime
,
in_opt
:
Input
,
out_opt
:
Option
<
Output
>
,
mut
flags
:
Flags
,
)
->
anyhow
::
Result
<
()
>
{
//
// Download
//
let
maybe_remote_repo
=
flags
.model_path_pos
.clone
()
.or_else
(||
flags
.model_path_flag
.clone
());
// Preserve the original model identifier before downloading (for default model name)
let
original_model_identifier
=
maybe_remote_repo
.as_ref
()
.map
(|
p
|
p
.display
()
.to_string
());
let
model_path
=
match
maybe_remote_repo
{
None
=>
None
,
Some
(
p
)
if
p
.exists
()
=>
{
// Already a local path
Some
(
p
)
}
Some
(
p
)
=>
{
// model_path might be an HF repo, not a local path. Resolve it by downloading.
// Mocker only needs tokenizer, not weights
let
ignore_weights
=
matches!
(
out_opt
,
Some
(
Output
::
Mocker
));
Some
(
LocalModel
::
fetch
(
&
p
.display
()
.to_string
(),
ignore_weights
)
.await
?
)
}
};
//
// Configure
//
let
mut
builder
=
LocalModelBuilder
::
default
();
builder
.model_name
(
flags
.model_name
.clone
()
.or
(
original_model_identifier
))
.kv_cache_block_size
(
flags
.kv_cache_block_size
)
// Only set if user provides. Usually loaded from tokenizer_config.json
.context_length
(
flags
.context_length
)
.http_port
(
flags
.http_port
)
.tls_cert_path
(
flags
.tls_cert_path
.take
())
.tls_key_path
(
flags
.tls_key_path
.take
())
.router_config
(
Some
(
flags
.router_config
()))
.migration_limit
(
flags
.migration_limit
)
.request_template
(
flags
.request_template
.clone
())
.is_mocker
(
matches!
(
out_opt
,
Some
(
Output
::
Mocker
)));
// Only the worker has a model path
if
let
Some
(
model_path
)
=
model_path
{
builder
.model_path
(
model_path
);
}
// TODO: old, address this later:
// If `in=dyn` we want the trtllm/sglang/vllm subprocess to listen on that endpoint.
// If not, then the endpoint isn't exposed so we let LocalModel invent one.
if
let
Input
::
Endpoint
(
path
)
=
&
in_opt
{
builder
.endpoint_id
(
Some
(
path
.parse
()
.with_context
(||
path
.clone
())
?
));
}
let
dst_config
=
if
is_process_local
(
&
in_opt
,
&
out_opt
)
{
// We are both the frontend and backend, no networking
DistributedConfig
::
process_local
()
}
else
{
// Normal case
let
selected_store
:
kv
::
Selector
=
flags
.store_kv
.parse
()
?
;
let
request_plane
:
RequestPlaneMode
=
flags
.request_plane
.parse
()
?
;
DistributedConfig
{
store_backend
:
selected_store
,
// We only need NATS here to monitor it's metrics, so only if it's our request plane.
nats_config
:
if
request_plane
.is_nats
()
{
Some
(
nats
::
ClientOptions
::
default
())
}
else
{
None
},
request_plane
,
}
};
let
distributed_runtime
=
DistributedRuntime
::
new
(
runtime
.clone
(),
dst_config
)
.await
?
;
let
local_model
=
builder
.build
()
.await
?
;
//
// Create an engine
//
let
out_opt
=
out_opt
.unwrap_or_else
(||
default_engine_for
(
&
local_model
));
print_cuda
(
&
out_opt
);
// Now that we know the output we're targeting, check if we expect it to work
flags
.validate
(
&
out_opt
)
?
;
// Make an engine from the local_model, flags and output.
let
engine_config
=
engine_for
(
out_opt
,
flags
.clone
(),
local_model
,
distributed_runtime
.clone
(),
)
.await
?
;
// Run it from an input
dynamo_llm
::
entrypoint
::
input
::
run_input
(
distributed_runtime
,
in_opt
,
engine_config
)
.await
?
;
Ok
(())
}
pub
fn
is_in_dynamic
(
in_opt
:
&
Input
)
->
bool
{
matches!
(
in_opt
,
Input
::
Endpoint
(
_
))
}
pub
fn
is_out_dynamic
(
out_opt
:
&
Option
<
Output
>
)
->
bool
{
matches!
(
out_opt
,
Some
(
Output
::
Auto
))
}
fn
is_process_local
(
in_opt
:
&
Input
,
out_opt
:
&
Option
<
Output
>
)
->
bool
{
!
is_in_dynamic
(
in_opt
)
&&
!
is_out_dynamic
(
out_opt
)
}
/// Create the engine matching `out_opt`
/// Note validation happens in Flags::validate. In here assume everything is going to work.
async
fn
engine_for
(
out_opt
:
Output
,
flags
:
Flags
,
local_model
:
LocalModel
,
drt
:
DistributedRuntime
,
)
->
anyhow
::
Result
<
EngineConfig
>
{
match
out_opt
{
Output
::
Auto
=>
{
// Auto-discover backends
Ok
(
EngineConfig
::
Dynamic
{
model
:
Box
::
new
(
local_model
),
chat_engine_factory
:
None
,
})
}
Output
::
Echo
=>
Ok
(
EngineConfig
::
InProcessText
{
model
:
Box
::
new
(
local_model
),
engine
:
dynamo_llm
::
engines
::
make_echo_engine
(),
}),
#[cfg(feature
=
"mistralrs"
)]
Output
::
MistralRs
=>
Ok
(
EngineConfig
::
InProcessText
{
engine
:
dynamo_engine_mistralrs
::
make_engine
(
&
local_model
)
.await
?
,
model
:
Box
::
new
(
local_model
),
}),
Output
::
Mocker
=>
{
let
args
=
flags
.mocker_config
();
let
endpoint
=
local_model
.endpoint_id
()
.clone
();
let
engine
=
dynamo_llm
::
mocker
::
make_mocker_engine
(
drt
,
endpoint
,
args
)
.await
?
;
Ok
(
EngineConfig
::
InProcessTokens
{
engine
,
model
:
Box
::
new
(
local_model
),
is_prefill
:
false
,
})
}
}
}
/// If the user will benefit from CUDA or Metal, remind them to build with it.
/// If they have it, celebrate!
// Only mistralrs needs to be built with CUDA.
// The Python engines only need it at runtime.
#[cfg(feature
=
"mistralrs"
)]
fn
print_cuda
(
output
:
&
Output
)
{
// These engines maybe be compiled in, but are they the chosen one?
match
output
{
#[cfg(feature
=
"mistralrs"
)]
Output
::
MistralRs
=>
{}
_
=>
{
return
;
}
}
#[cfg(feature
=
"cuda"
)]
{
tracing
::
info!
(
"CUDA on"
);
}
#[cfg(feature
=
"metal"
)]
{
tracing
::
info!
(
"Metal on"
);
}
#[cfg(not(any(feature
=
"cuda"
,
feature
=
"metal"
)))]
tracing
::
info!
(
"CPU mode. Rebuild with `--features cuda|metal` for better performance"
);
}
#[cfg(not(feature
=
"mistralrs"
))]
fn
print_cuda
(
_
output
:
&
Output
)
{}
fn
default_engine_for
(
_
local_model
:
&
LocalModel
)
->
Output
{
safetensors_default
()
}
fn
safetensors_default
()
->
Output
{
#[cfg(feature
=
"mistralrs"
)]
{
Output
::
MistralRs
}
#[cfg(not(feature
=
"mistralrs"
))]
{
Output
::
Echo
}
}
launch/dynamo-run/src/main.rs
deleted
100644 → 0
View file @
2c747d64
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
env
;
use
clap
::{
CommandFactory
as
_
,
Parser
};
use
dynamo_runtime
::
config
::
environment_names
::
logging
as
env_logging
;
use
dynamo_llm
::
entrypoint
::
input
::
Input
;
use
dynamo_run
::
Output
;
use
dynamo_runtime
::
logging
;
const
HELP
:
&
str
=
r#"
dynamo-run is a single binary that wires together the various inputs (http, text, network) and workers (network, engine), that runs the services. It is the simplest way to use dynamo locally.
Verbosity:
- -v enables debug logs
- -vv enables full trace logs
- Default is info level logging
Example:
- cargo build --features cuda -p dynamo-run
- cd target/debug
- ./dynamo-run Qwen/Qwen3-0.6B (OR ./dynamo-run /data/hf-checkouts/Qwen3-0.6B)
See `docs/guides/dynamo_run.md` in the repo for full details.
"#
;
const
USAGE
:
&
str
=
"USAGE: dynamo-run in=[http|grpc|text|dyn://<path>|batch:<folder>] out=ENGINE_LIST|auto|dyn://<path> [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--context-length=N] [--kv-cache-block-size=16] [--extra-engine-args=args.json] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=2.0] [--router-temperature=0.0] [--use-kv-events] [--max-num-batched-tokens=1.0] [--migration-limit=0] [--verbosity (-v|-vv)]"
;
fn
main
()
->
anyhow
::
Result
<
()
>
{
// Set log level based on verbosity flag
let
log_level
=
match
dynamo_run
::
Flags
::
try_parse
()
{
Ok
(
flags
)
=>
match
flags
.verbosity
{
0
=>
"info"
,
1
=>
"debug"
,
2
=>
"trace"
,
_
=>
{
return
Err
(
anyhow
::
anyhow!
(
"Invalid verbosity level. Valid values are v (debug) or vv (trace)"
));
}
},
Err
(
_
)
=>
"info"
,
};
if
log_level
!=
"info"
{
unsafe
{
std
::
env
::
set_var
(
env_logging
::
DYN_LOG
,
log_level
)
};
}
logging
::
init
();
// max_worker_threads and max_blocking_threads from env vars or config file.
let
rt_config
=
dynamo_runtime
::
RuntimeConfig
::
from_settings
()
?
;
tracing
::
debug!
(
"Runtime config: {rt_config}"
);
// One per process. Wraps a Runtime with holds one or two tokio runtimes.
let
worker
=
dynamo_runtime
::
Worker
::
from_config
(
rt_config
)
?
;
worker
.execute
(
wrapper
)
}
async
fn
wrapper
(
runtime
:
dynamo_runtime
::
Runtime
)
->
anyhow
::
Result
<
()
>
{
let
mut
in_opt
=
None
;
let
mut
out_opt
=
None
;
let
args
:
Vec
<
String
>
=
env
::
args
()
.skip
(
1
)
.collect
();
if
args
.is_empty
()
||
args
[
0
]
==
"-h"
||
args
[
0
]
==
"--help"
||
(
args
.iter
()
.all
(|
arg
|
arg
==
"-v"
||
arg
==
"-vv"
))
{
let
engine_list
=
Output
::
available_engines
()
.join
(
"|"
);
let
usage
=
USAGE
.replace
(
"ENGINE_LIST"
,
&
engine_list
);
println!
(
"{usage}"
);
println!
(
"{HELP}"
);
dynamo_run
::
Flags
::
command
()
.print_long_help
()
.unwrap
();
return
Ok
(());
}
else
if
args
[
0
]
==
"--version"
{
if
let
Some
(
describe
)
=
option_env!
(
"VERGEN_GIT_DESCRIBE"
)
{
println!
(
"dynamo-run {}"
,
describe
);
}
else
{
println!
(
"Version not available (git describe not available)"
);
}
return
Ok
(());
}
for
arg
in
env
::
args
()
.skip
(
1
)
.take
(
2
)
{
let
Some
((
in_out
,
val
))
=
arg
.split_once
(
'='
)
else
{
// Probably we're defaulting in and/or out, and this is a flag
continue
;
};
match
in_out
{
"in"
=>
{
in_opt
=
Some
(
val
.try_into
()
?
);
}
"out"
=>
{
if
val
==
"sglang"
||
val
==
"trtllm"
||
val
==
"vllm"
{
tracing
::
error!
(
"To run the {val} engine please use the Python interface, see root README or look in directory `examples/backends/`."
);
std
::
process
::
exit
(
1
);
}
out_opt
=
Some
(
val
.try_into
()
?
);
}
_
=>
{
anyhow
::
bail!
(
"Invalid argument, must start with 'in' or 'out. {USAGE}"
);
}
}
}
let
mut
non_flag_params
=
1
;
// binary name
let
in_opt
=
match
in_opt
{
Some
(
x
)
=>
{
non_flag_params
+=
1
;
x
}
None
=>
Input
::
default
(),
};
if
out_opt
.is_some
()
{
non_flag_params
+=
1
;
}
// Clap skips the first argument expecting it to be the binary name, so add it back
// Note `--model-path` has index=1 (in lib.rs) so that doesn't need a flag.
let
flags
=
dynamo_run
::
Flags
::
try_parse_from
(
[
"dynamo-run"
.to_string
()]
.into_iter
()
.chain
(
env
::
args
()
.skip
(
non_flag_params
)),
)
?
;
if
dynamo_run
::
is_in_dynamic
(
&
in_opt
)
&&
dynamo_run
::
is_out_dynamic
(
&
out_opt
)
{
anyhow
::
bail!
(
"Cannot use endpoint for both in and out"
);
}
dynamo_run
::
run
(
runtime
,
in_opt
,
out_opt
,
flags
)
.await
}
launch/dynamo-run/src/opt.rs
deleted
100644 → 0
View file @
2c747d64
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
fmt
;
pub
enum
Output
{
/// Echos the prompt back as the response
Echo
,
/// Listen for models on nats/etcd, add/remove dynamically
Auto
,
#[cfg(feature
=
"mistralrs"
)]
MistralRs
,
Mocker
,
}
impl
TryFrom
<&
str
>
for
Output
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
s
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
match
s
{
#[cfg(feature
=
"mistralrs"
)]
"mistralrs"
=>
Ok
(
Output
::
MistralRs
),
"mocker"
=>
Ok
(
Output
::
Mocker
),
"echo"
|
"echo_full"
=>
Ok
(
Output
::
Echo
),
"dyn"
|
"auto"
=>
Ok
(
Output
::
Auto
),
e
=>
Err
(
anyhow
::
anyhow!
(
"Invalid out= option '{e}'"
)),
}
}
}
impl
fmt
::
Display
for
Output
{
fn
fmt
(
&
self
,
f
:
&
mut
fmt
::
Formatter
)
->
fmt
::
Result
{
let
s
=
match
self
{
#[cfg(feature
=
"mistralrs"
)]
Output
::
MistralRs
=>
"mistralrs"
,
Output
::
Mocker
=>
"mocker"
,
Output
::
Echo
=>
"echo"
,
Output
::
Auto
=>
"auto"
,
};
write!
(
f
,
"{s}"
)
}
}
impl
Output
{
#[allow(unused_mut)]
pub
fn
available_engines
()
->
Vec
<
String
>
{
let
mut
out
=
vec!
[
"echo"
.to_string
(),
Output
::
Mocker
.to_string
()];
#[cfg(feature
=
"mistralrs"
)]
{
out
.push
(
Output
::
MistralRs
.to_string
());
}
out
}
}
lib/bindings/python/examples/cli/cli.py
View file @
bbe82f18
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Example cli using the Python bindings
, similar to `dynamo-run`
.
# Example cli using the Python bindings.
#
# Usage: `python cli.py in=text out=echo <your-model>`.
# `in` can be:
...
...
lib/bindings/python/examples/hello_world/server_sglang.py
View file @
bbe82f18
...
...
@@ -16,8 +16,7 @@
# Start nats and etcd:
# - nats-server -js
#
# Window 1: `python server_sglang.py`. Wait for log "Starting endpoint".
# Window 2: `dynamo-run out=dyn
# `python server_sglang.py`. Wait for log "Starting endpoint".
import
argparse
import
asyncio
...
...
lib/bindings/python/examples/hello_world/server_sglang_tok.py
View file @
bbe82f18
...
...
@@ -17,8 +17,7 @@
# Start nats and etcd:
# - nats-server -js
#
# Window 1: `python server_sglang.py`. Wait for log "Starting endpoint".
# Window 2: `dynamo-run out=dyn
# `python server_sglang.py`. Wait for log "Starting endpoint".
import
argparse
import
asyncio
...
...
lib/bindings/python/examples/hello_world/server_vllm.py
View file @
bbe82f18
...
...
@@ -12,8 +12,7 @@
# Start nats and etcd:
# - nats-server -js
#
# Window 1: `python server_vllm.py`. Wait for log "Starting endpoint".
# Window 2: `dynamo-run out=dyn
# `python server_vllm.py`. Wait for log "Starting endpoint".
import
argparse
import
asyncio
...
...
lib/engines/mistralrs/Cargo.toml
deleted
100644 → 0
View file @
2c747d64
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
[package]
name
=
"dynamo-engine-mistralrs"
version.workspace
=
true
edition.workspace
=
true
description.workspace
=
true
authors.workspace
=
true
license.workspace
=
true
homepage.workspace
=
true
repository.workspace
=
true
keywords.workspace
=
true
[features]
default
=
[]
cuda
=
["mistralrs/cuda"]
metal
=
["mistralrs/metal"]
[dependencies]
dynamo-runtime
=
{
workspace
=
true
}
dynamo-llm
=
{
workspace
=
true
}
anyhow
=
{
workspace
=
true
}
dynamo-async-openai
=
{
workspace
=
true
}
async-stream
=
{
workspace
=
true
}
async-trait
=
{
workspace
=
true
}
either
=
{
workspace
=
true
}
indexmap
=
{
version
=
"2.9.0"
,
features
=
["serde"]
}
mistralrs
=
{
git
=
"https://github.com/EricLBuehler/mistral.rs.git"
,
version
=
"0.6.0"
,
rev
=
"2bcf0e9e3"
}
serde_json
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
tracing
=
{
workspace
=
true
}
lib/engines/mistralrs/src/lib.rs
deleted
100644 → 0
View file @
2c747d64
This diff is collapsed.
Click to expand it.
lib/llm/src/entrypoint/input/text.rs
View file @
bbe82f18
...
...
@@ -50,7 +50,7 @@ async fn main_loop(
}
let
theme
=
dialoguer
::
theme
::
ColorfulTheme
::
default
();
// Initial prompt is
the
pipe
case: `echo "Hello" | dynamo-run ..`
// Initial prompt is
from
pipe
d stdin.
// We run that single prompt and exit
let
single
=
initial_prompt
.is_some
();
let
mut
history
=
dialoguer
::
BasicHistory
::
default
();
...
...
lib/runtime/src/logging.rs
View file @
bbe82f18
...
...
@@ -122,7 +122,6 @@ impl Default for LoggingConfig {
(
"tokenizers"
.to_string
(),
"error"
.to_string
()),
(
"axum"
.to_string
(),
"error"
.to_string
()),
(
"tonic"
.to_string
(),
"error"
.to_string
()),
(
"mistralrs_core"
.to_string
(),
"error"
.to_string
()),
(
"hf_hub"
.to_string
(),
"error"
.to_string
()),
(
"opentelemetry"
.to_string
(),
"error"
.to_string
()),
(
"opentelemetry-otlp"
.to_string
(),
"error"
.to_string
()),
...
...
tests/lmcache/deploy-baseline-dynamo-disag.sh
View file @
bbe82f18
...
...
@@ -24,12 +24,6 @@ echo "🚀 Starting dynamo disaggregated serving setup without LMCache:"
echo
" Model:
$MODEL_URL
"
echo
" Port: 8000"
echo
" Mode: Disaggregated (prefill + decode workers)"
# Kill any existing dynamo processes
echo
"🧹 Cleaning up any existing dynamo processes..."
pkill
-f
"dynamo-run"
||
true
sleep
2
echo
"🔧 Starting dynamo disaggregated serving without LMCache..."
python
-m
dynamo.frontend &
...
...
tests/lmcache/deploy-baseline-dynamo.sh
View file @
bbe82f18
...
...
@@ -22,12 +22,6 @@ fi
echo
"🚀 Starting dynamo setup without LMCache:"
echo
" Model:
$MODEL_URL
"
echo
" Port: 8000"
# Kill any existing dynamo processes
echo
"🧹 Cleaning up any existing dynamo processes..."
pkill
-f
"dynamo-run"
||
true
sleep
2
echo
"🔧 Starting dynamo worker without LMCache..."
python
-m
dynamo.frontend &
...
...
tests/lmcache/deploy-lmcache_enabled-dynamo-disag.sh
View file @
bbe82f18
...
...
@@ -25,12 +25,6 @@ echo " Model: $MODEL_URL"
echo
" Port: 8000"
echo
" Mode: Disaggregated (prefill + decode workers) + LMCache"
echo
" !! Remember to kill the old dynamo processes otherwise the port will be busy !!"
# Kill any existing dynamo processes
echo
"🧹 Cleaning up any existing dynamo processes..."
pkill
-f
"dynamo-run"
||
true
sleep
2
echo
"🔧 Starting dynamo disaggregated serving with LMCache enabled..."
python
-m
dynamo.frontend &
...
...
tests/lmcache/deploy-lmcache_enabled-dynamo.sh
View file @
bbe82f18
...
...
@@ -23,12 +23,6 @@ echo "🚀 Starting dynamo setup with LMCache:"
echo
" Model:
$MODEL_URL
"
echo
" Port: 8000"
echo
" !! Remmber to kill the old dynamo processes other wise the port will be busy !! "
# Kill any existing dynamo processes
echo
"🧹 Cleaning up any existing dynamo processes..."
pkill
-f
"dynamo-run"
||
true
sleep
2
echo
"🔧 Starting dynamo worker with LMCache enabled..."
python
-m
dynamo.frontend &
...
...
tests/lmcache/run_test.sh
View file @
bbe82f18
...
...
@@ -17,10 +17,6 @@ echo ""
cleanup
()
{
echo
"🧹 Cleaning up running processes..."
# Kill any remaining dynamo processes
pkill
-f
"dynamo-run"
||
true
pkill
-f
"components/main.py"
||
true
# Stop docker services
docker compose
-f
../../deploy/docker-compose.yml down 2>/dev/null
||
true
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment