Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
46ed649c
Commit
46ed649c
authored
Mar 05, 2025
by
Graham King
Committed by
GitHub
Mar 05, 2025
Browse files
fix: mistralrs use auto device map (#31)
Fixes a panic.
parent
3ba2b7e9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
24 additions
and
11 deletions
+24
-11
launch/dynemo-run/README.md
launch/dynemo-run/README.md
+7
-8
lib/llm/Cargo.toml
lib/llm/Cargo.toml
+2
-0
lib/llm/src/engines/mistralrs.rs
lib/llm/src/engines/mistralrs.rs
+15
-3
No files found.
launch/dynemo-run/README.md
View file @
46ed649c
...
@@ -2,16 +2,15 @@
...
@@ -2,16 +2,15 @@
`dynemo-run`
is a tool for exploring the dynemo components.
`dynemo-run`
is a tool for exploring the dynemo components.
##
Quickstart
##
Setup
-
Install Rust
Libraries (Ubuntu):
-
`cargo install --features mistralrs,cuda --git https://github.com/dynemo-ai/dynemo.git dynemo-run`
```
-
`dynemo-run <GGUF or HF-repo-checkout>`
apt install -y build-essential libhwloc-dev libudev-dev pkg-config libssl-dev protobuf-compiler python3-dev
```
## Install and start pre-requisites
Rust:
Install
Rust:
```
bash
```
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
```
...
...
lib/llm/Cargo.toml
View file @
46ed649c
...
@@ -128,6 +128,8 @@ llama-cpp-2 = { version = "0.1.86", optional = true }
...
@@ -128,6 +128,8 @@ llama-cpp-2 = { version = "0.1.86", optional = true }
tokenizers
=
{
version
=
"0.21.0"
,
default-features
=
false
,
features
=
[
tokenizers
=
{
version
=
"0.21.0"
,
default-features
=
false
,
features
=
[
"onig"
,
"onig"
,
"esaxx_fast"
,
"esaxx_fast"
,
# Waiting for release: https://github.com/huggingface/tokenizers/issues/1736
# "rustls-tls",
]
}
]
}
sentencepiece
=
{
version
=
"0.11.2"
,
optional
=
true
}
sentencepiece
=
{
version
=
"0.11.2"
,
optional
=
true
}
...
...
lib/llm/src/engines/mistralrs.rs
View file @
46ed649c
...
@@ -21,7 +21,7 @@ use async_trait::async_trait;
...
@@ -21,7 +21,7 @@ use async_trait::async_trait;
use
either
::
Either
;
use
either
::
Either
;
use
indexmap
::
IndexMap
;
use
indexmap
::
IndexMap
;
use
mistralrs
::{
use
mistralrs
::{
Constraint
,
DefaultSchedulerMethod
,
Device
,
DeviceMapMetadata
,
DeviceMapSetting
,
AutoDeviceMapParams
,
Constraint
,
DefaultSchedulerMethod
,
Device
,
DeviceMapSetting
,
GGUFLoaderBuilder
,
GGUFSpecificConfig
,
MemoryGpuConfig
,
MistralRs
,
MistralRsBuilder
,
GGUFLoaderBuilder
,
GGUFSpecificConfig
,
MemoryGpuConfig
,
MistralRs
,
MistralRsBuilder
,
ModelDType
,
NormalLoaderBuilder
,
NormalRequest
,
NormalSpecificConfig
,
PagedAttentionConfig
,
ModelDType
,
NormalLoaderBuilder
,
NormalRequest
,
NormalSpecificConfig
,
PagedAttentionConfig
,
Pipeline
,
Request
,
RequestMessage
,
ResponseOk
,
SamplingParams
,
SchedulerConfig
,
TokenSource
,
Pipeline
,
Request
,
RequestMessage
,
ResponseOk
,
SamplingParams
,
SchedulerConfig
,
TokenSource
,
...
@@ -41,6 +41,15 @@ use crate::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine
...
@@ -41,6 +41,15 @@ use crate::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine
/// If user does not provide a max_tokens limit prompt+output to this many
/// If user does not provide a max_tokens limit prompt+output to this many
const
DEFAULT_MAX_TOKENS
:
i32
=
8192
;
const
DEFAULT_MAX_TOKENS
:
i32
=
8192
;
/// TODO: tune. Presumably we read it from model's config.json?
const
MAX_SEQ_LEN
:
usize
=
4096
;
// TODO: tune, maybe implement batching.
const
MAX_BATCH_SIZE
:
usize
=
2
;
/// TODO: tune
const
PAGED_ATTENTION_MAX_NUM_SEQS
:
usize
=
5
;
pub
async
fn
make_engine
(
pub
async
fn
make_engine
(
gguf_path
:
&
Path
,
gguf_path
:
&
Path
,
)
->
pipeline_error
::
Result
<
OpenAIChatCompletionsStreamingEngine
>
{
)
->
pipeline_error
::
Result
<
OpenAIChatCompletionsStreamingEngine
>
{
...
@@ -125,7 +134,10 @@ impl MistralRsEngine {
...
@@ -125,7 +134,10 @@ impl MistralRsEngine {
&
ModelDType
::
Auto
,
&
ModelDType
::
Auto
,
&
best_device
()
?
,
&
best_device
()
?
,
false
,
false
,
DeviceMapSetting
::
Map
(
DeviceMapMetadata
::
dummy
()),
DeviceMapSetting
::
Auto
(
AutoDeviceMapParams
::
Text
{
max_seq_len
:
MAX_SEQ_LEN
,
max_batch_size
:
MAX_BATCH_SIZE
,
}),
None
,
None
,
paged_attention_config
,
paged_attention_config
,
)
?
;
)
?
;
...
@@ -138,7 +150,7 @@ impl MistralRsEngine {
...
@@ -138,7 +150,7 @@ impl MistralRsEngine {
}
}
};
};
SchedulerConfig
::
PagedAttentionMeta
{
SchedulerConfig
::
PagedAttentionMeta
{
max_num_seqs
:
5
,
max_num_seqs
:
PAGED_ATTENTION_MAX_NUM_SEQS
,
config
,
config
,
}
}
}
else
{
}
else
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment