Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
182d3b5d
Unverified
Commit
182d3b5d
authored
Jul 16, 2025
by
Graham King
Committed by
GitHub
Jul 16, 2025
Browse files
chore(bindings): Remove mistralrs / llama.cpp (#1970)
parent
def6eaa9
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
87 additions
and
2101 deletions
+87
-2101
lib/bindings/python/Cargo.lock
lib/bindings/python/Cargo.lock
+79
-2015
lib/bindings/python/Cargo.toml
lib/bindings/python/Cargo.toml
+0
-4
lib/bindings/python/README.md
lib/bindings/python/README.md
+0
-20
lib/bindings/python/examples/cli/cli.py
lib/bindings/python/examples/cli/cli.py
+5
-23
lib/bindings/python/rust/llm/entrypoint.rs
lib/bindings/python/rust/llm/entrypoint.rs
+2
-38
lib/runtime/src/runtime.rs
lib/runtime/src/runtime.rs
+1
-1
No files found.
lib/bindings/python/Cargo.lock
View file @
182d3b5d
This diff is collapsed.
Click to expand it.
lib/bindings/python/Cargo.toml
View file @
182d3b5d
...
...
@@ -36,14 +36,10 @@ crate-type = ["cdylib", "rlib"]
[features]
default
=
[]
block-manager
=
[
"dynamo-llm/block-manager"
,
"dep:dlpark"
]
mistralrs
=
["dep:dynamo-engine-mistralrs"]
llamacpp
=
["dep:dynamo-engine-llamacpp"]
[dependencies]
dynamo-llm
=
{
path
=
"../../llm"
}
dynamo-runtime
=
{
path
=
"../../runtime"
}
dynamo-engine-mistralrs
=
{
path
=
"../../engines/mistralrs"
,
features
=
["cuda"]
,
optional
=
true
}
dynamo-engine-llamacpp
=
{
path
=
"../../engines/llamacpp"
,
features
=
[
"cuda"
,
"dynamic-link"
],
optional
=
true
}
anyhow
=
{
version
=
"1"
}
async-openai
=
{
version
=
"0.29.0"
}
...
...
lib/bindings/python/README.md
View file @
182d3b5d
...
...
@@ -46,26 +46,6 @@ uv pip install maturin
maturin develop --uv
```
5.
Experimental: To allow using mistral.rs and llama.cpp via the bindings, build with feature flags:
```
maturin develop --features mistralrs,llamacpp --release
```
`--release`
is optional. It builds slower but the resulting library is significantly faster.
See
`examples/cli/cli.py`
for usage.
They will both be built for CUDA by default. If you see a runtime error
`CUDA_ERROR_STUB_LIBRARY`
this is because
the stub
`libcuda.so`
is earlier on the library search path than the real libcuda. Try removing the
`rpath`
from the library:
```
patchelf --set-rpath '' _core.cpython-312-x86_64-linux-gnu.so
```
If you include the
`llamacpp`
feature flag,
`libllama.so`
and
`libggml.so`
(and family) will need to be available at runtime.
## Run Examples
### Prerequisite
...
...
lib/bindings/python/examples/cli/cli.py
View file @
182d3b5d
...
...
@@ -3,7 +3,7 @@
# Example cli using the Python bindings, similar to `dynamo-run`.
#
# Usage: `python cli.py in=text out=
mistralrs
<your-model>`.
# Usage: `python cli.py in=text out=
echo
<your-model>`.
# `in` can be:
# - "http": OpenAI compliant HTTP server
# - "text": Interactive text chat
...
...
@@ -13,28 +13,12 @@
#
# `out` can be:
# - "dyn": Run as the frontend node. Auto-discover workers and route traffic to them.
# -
"mistralrs", "llamacpp",
"sglang", "vllm", "trtllm", "echo": An LLM worker.
# - "sglang", "vllm", "trtllm", "echo": An LLM worker.
#
# Must be in a virtualenv with the Dynamo bindings (or wheel) installed.
#
# To use mistralrs or llamacpp you must build the library with those features:
# ```
# maturin develop --features mistralrs,llamacpp --release
# ```
#
# `--release` is optional. It builds slower but the resulting library is significantly faster.
#
# They will both be built for CUDA by default. If you see a runtime error `CUDA_ERROR_STUB_LIBRARY` this is because
# the stub `libcuda.so` is earlier on the library search path than the real libcuda. Try removing
# the `rpath` from the library:
#
# ```
# patchelf --set-rpath '' _core.cpython-312-x86_64-linux-gnu.so
# ```
#
# If you include the `llamacpp` feature flag, `libllama.so` and `libggml.so` (and family) will need to be
# available at runtime.
#
# There is no provided llama.cpp engine here, but there is one in components/llama_cpp/. It would be
# easy enough to copy the few Python lines from there to here and add an `out=llama_cpp`.
import
argparse
import
asyncio
...
...
@@ -79,7 +63,7 @@ def parse_args():
# --- Step 2: Argparse for flags and the model path ---
parser
=
argparse
.
ArgumentParser
(
description
=
"Dynamo example CLI: Connect inputs to an engine"
,
usage
=
"python cli.py in=text out=
mistralrs
<your-model>"
,
usage
=
"python cli.py in=text out=
echo
<your-model>"
,
formatter_class
=
argparse
.
RawTextHelpFormatter
,
# To preserve multi-line help formatting
)
...
...
@@ -186,8 +170,6 @@ async def run():
engine_type_map
=
{
"echo"
:
EngineType
.
Echo
,
"mistralrs"
:
EngineType
.
MistralRs
,
"llamacpp"
:
EngineType
.
LlamaCpp
,
"dyn"
:
EngineType
.
Dynamic
,
}
out_mode
=
args
[
"out_mode"
]
...
...
lib/bindings/python/rust/llm/entrypoint.rs
View file @
182d3b5d
...
...
@@ -17,10 +17,8 @@ use dynamo_runtime::protocols::Endpoint as EndpointId;
#[repr(i32)]
pub
enum
EngineType
{
Echo
=
1
,
MistralRs
=
2
,
LlamaCpp
=
3
,
Dynamic
=
4
,
Mocker
=
5
,
Dynamic
=
2
,
Mocker
=
3
,
}
#[pyclass]
...
...
@@ -157,40 +155,6 @@ async fn select_engine(
model
:
Box
::
new
(
local_model
),
}
}
EngineType
::
MistralRs
=>
{
#[cfg(feature
=
"mistralrs"
)]
{
RsEngineConfig
::
StaticFull
{
engine
:
dynamo_engine_mistralrs
::
make_engine
(
&
local_model
)
.await
?
,
model
:
Box
::
new
(
local_model
),
}
}
#[cfg(not(feature
=
"mistralrs"
))]
{
anyhow
::
bail!
(
"mistralrs engine is not enabled. Rebuild bindings with `--features mistralrs`"
);
}
}
EngineType
::
LlamaCpp
=>
{
#[cfg(feature
=
"llamacpp"
)]
{
RsEngineConfig
::
StaticCore
{
engine
:
dynamo_engine_llamacpp
::
make_engine
(
distributed_runtime
.inner
.primary_token
(),
&
local_model
,
)
.await
?
,
model
:
Box
::
new
(
local_model
),
}
}
#[cfg(not(feature
=
"llamacpp"
))]
{
anyhow
::
bail!
(
"llamacpp engine is not enabled. Rebuild bindings with `--features llamacpp`"
);
}
}
};
Ok
(
inner
)
...
...
lib/runtime/src/runtime.rs
View file @
182d3b5d
...
...
@@ -30,7 +30,7 @@ use crate::config::{self, RuntimeConfig};
use
futures
::
Future
;
use
once_cell
::
sync
::
OnceCell
;
use
std
::
sync
::
{
Arc
,
Mutex
}
;
use
std
::
sync
::
Arc
;
use
tokio
::{
signal
,
task
::
JoinHandle
};
pub
use
tokio_util
::
sync
::
CancellationToken
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment