Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
bce74588
Unverified
Commit
bce74588
authored
Aug 22, 2025
by
Graham King
Committed by
GitHub
Aug 22, 2025
Browse files
chore: Rust to 1.89 and edition 2024 (#2659)
parent
268d017e
Changes
199
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
113 additions
and
118 deletions
+113
-118
lib/async-openai/src/types/assistant_stream.rs
lib/async-openai/src/types/assistant_stream.rs
+1
-1
lib/async-openai/src/types/embedding.rs
lib/async-openai/src/types/embedding.rs
+1
-1
lib/async-openai/src/types/impls.rs
lib/async-openai/src/types/impls.rs
+1
-1
lib/async-openai/src/uploads.rs
lib/async-openai/src/uploads.rs
+1
-1
lib/async-openai/src/users.rs
lib/async-openai/src/users.rs
+1
-1
lib/async-openai/src/util.rs
lib/async-openai/src/util.rs
+1
-1
lib/async-openai/src/vector_store_file_batches.rs
lib/async-openai/src/vector_store_file_batches.rs
+1
-1
lib/async-openai/src/vector_store_files.rs
lib/async-openai/src/vector_store_files.rs
+1
-1
lib/async-openai/src/vector_stores.rs
lib/async-openai/src/vector_stores.rs
+1
-1
lib/async-openai/tests/bring-your-own-type.rs
lib/async-openai/tests/bring-your-own-type.rs
+2
-2
lib/async-openai/tests/whisper.rs
lib/async-openai/tests/whisper.rs
+1
-1
lib/bindings/c/src/lib.rs
lib/bindings/c/src/lib.rs
+6
-10
lib/engines/llamacpp/src/lib.rs
lib/engines/llamacpp/src/lib.rs
+4
-4
lib/engines/mistralrs/src/lib.rs
lib/engines/mistralrs/src/lib.rs
+11
-12
lib/llm/benches/tokenizer.rs
lib/llm/benches/tokenizer.rs
+2
-2
lib/llm/src/backend.rs
lib/llm/src/backend.rs
+48
-50
lib/llm/src/block_manager.rs
lib/llm/src/block_manager.rs
+4
-4
lib/llm/src/block_manager/block.rs
lib/llm/src/block_manager/block.rs
+10
-12
lib/llm/src/block_manager/block/data.rs
lib/llm/src/block_manager/block/data.rs
+12
-8
lib/llm/src/block_manager/block/data/local.rs
lib/llm/src/block_manager/block/data/local.rs
+4
-4
No files found.
lib/async-openai/src/types/assistant_stream.rs
View file @
bce74588
...
@@ -13,7 +13,7 @@ use std::pin::Pin;
...
@@ -13,7 +13,7 @@ use std::pin::Pin;
use
futures
::
Stream
;
use
futures
::
Stream
;
use
serde
::
Deserialize
;
use
serde
::
Deserialize
;
use
crate
::
error
::{
map_deserialization_error
,
ApiError
,
OpenAIE
rror
};
use
crate
::
error
::{
ApiError
,
OpenAIError
,
map_deserialization_e
rror
};
use
super
::{
use
super
::{
MessageDeltaObject
,
MessageObject
,
RunObject
,
RunStepDeltaObject
,
RunStepObject
,
ThreadObject
,
MessageDeltaObject
,
MessageObject
,
RunObject
,
RunStepDeltaObject
,
RunStepObject
,
ThreadObject
,
...
...
lib/async-openai/src/types/embedding.rs
View file @
bce74588
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Licensed under Apache 2.0
// Licensed under Apache 2.0
use
base64
::
engine
::{
general_purpose
,
Engine
};
use
base64
::
engine
::{
Engine
,
general_purpose
};
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
...
...
lib/async-openai/src/types/impls.rs
View file @
bce74588
...
@@ -24,7 +24,6 @@ use crate::{
...
@@ -24,7 +24,6 @@ use crate::{
use
bytes
::
Bytes
;
use
bytes
::
Bytes
;
use
super
::{
use
super
::{
responses
::{
CodeInterpreterContainer
,
Input
,
InputContent
,
Role
as
ResponsesRole
},
AddUploadPartRequest
,
AudioInput
,
AudioResponseFormat
,
ChatCompletionFunctionCall
,
AddUploadPartRequest
,
AudioInput
,
AudioResponseFormat
,
ChatCompletionFunctionCall
,
ChatCompletionFunctions
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionFunctions
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestDeveloperMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestDeveloperMessage
,
...
@@ -40,6 +39,7 @@ use super::{
...
@@ -40,6 +39,7 @@ use super::{
EmbeddingInput
,
FileInput
,
FilePurpose
,
FunctionName
,
Image
,
ImageInput
,
ImageModel
,
EmbeddingInput
,
FileInput
,
FilePurpose
,
FunctionName
,
Image
,
ImageInput
,
ImageModel
,
ImageResponseFormat
,
ImageSize
,
ImageUrl
,
ImagesResponse
,
ModerationInput
,
Prompt
,
Role
,
Stop
,
ImageResponseFormat
,
ImageSize
,
ImageUrl
,
ImagesResponse
,
ModerationInput
,
Prompt
,
Role
,
Stop
,
TimestampGranularity
,
TimestampGranularity
,
responses
::{
CodeInterpreterContainer
,
Input
,
InputContent
,
Role
as
ResponsesRole
},
};
};
/// for `impl_from!(T, Enum)`, implements
/// for `impl_from!(T, Enum)`, implements
...
...
lib/async-openai/src/uploads.rs
View file @
bce74588
...
@@ -9,10 +9,10 @@
...
@@ -9,10 +9,10 @@
// Licensed under Apache 2.0
// Licensed under Apache 2.0
use
crate
::{
use
crate
::{
Client
,
config
::
Config
,
config
::
Config
,
error
::
OpenAIError
,
error
::
OpenAIError
,
types
::{
AddUploadPartRequest
,
CompleteUploadRequest
,
CreateUploadRequest
,
Upload
,
UploadPart
},
types
::{
AddUploadPartRequest
,
CompleteUploadRequest
,
CreateUploadRequest
,
Upload
,
UploadPart
},
Client
,
};
};
/// Allows you to upload large files in multiple parts.
/// Allows you to upload large files in multiple parts.
...
...
lib/async-openai/src/users.rs
View file @
bce74588
...
@@ -11,10 +11,10 @@
...
@@ -11,10 +11,10 @@
use
serde
::
Serialize
;
use
serde
::
Serialize
;
use
crate
::{
use
crate
::{
Client
,
config
::
Config
,
config
::
Config
,
error
::
OpenAIError
,
error
::
OpenAIError
,
types
::{
User
,
UserDeleteResponse
,
UserListResponse
,
UserRoleUpdateRequest
},
types
::{
User
,
UserDeleteResponse
,
UserListResponse
,
UserRoleUpdateRequest
},
Client
,
};
};
/// Manage users and their role in an organization. Users will be automatically added to the Default project.
/// Manage users and their role in an organization. Users will be automatically added to the Default project.
...
...
lib/async-openai/src/util.rs
View file @
bce74588
...
@@ -29,7 +29,7 @@ pub(crate) async fn file_stream_body(source: InputSource) -> Result<Body, OpenAI
...
@@ -29,7 +29,7 @@ pub(crate) async fn file_stream_body(source: InputSource) -> Result<Body, OpenAI
_
=>
{
_
=>
{
return
Err
(
OpenAIError
::
FileReadError
(
return
Err
(
OpenAIError
::
FileReadError
(
"Cannot create stream from non-file source"
.to_string
(),
"Cannot create stream from non-file source"
.to_string
(),
))
))
;
}
}
};
};
Ok
(
body
)
Ok
(
body
)
...
...
lib/async-openai/src/vector_store_file_batches.rs
View file @
bce74588
...
@@ -11,12 +11,12 @@
...
@@ -11,12 +11,12 @@
use
serde
::
Serialize
;
use
serde
::
Serialize
;
use
crate
::{
use
crate
::{
Client
,
config
::
Config
,
config
::
Config
,
error
::
OpenAIError
,
error
::
OpenAIError
,
types
::{
types
::{
CreateVectorStoreFileBatchRequest
,
ListVectorStoreFilesResponse
,
VectorStoreFileBatchObject
,
CreateVectorStoreFileBatchRequest
,
ListVectorStoreFilesResponse
,
VectorStoreFileBatchObject
,
},
},
Client
,
};
};
/// Vector store file batches represent operations to add multiple files to a vector store.
/// Vector store file batches represent operations to add multiple files to a vector store.
...
...
lib/async-openai/src/vector_store_files.rs
View file @
bce74588
...
@@ -11,13 +11,13 @@
...
@@ -11,13 +11,13 @@
use
serde
::
Serialize
;
use
serde
::
Serialize
;
use
crate
::{
use
crate
::{
Client
,
config
::
Config
,
config
::
Config
,
error
::
OpenAIError
,
error
::
OpenAIError
,
types
::{
types
::{
CreateVectorStoreFileRequest
,
DeleteVectorStoreFileResponse
,
ListVectorStoreFilesResponse
,
CreateVectorStoreFileRequest
,
DeleteVectorStoreFileResponse
,
ListVectorStoreFilesResponse
,
VectorStoreFileContentResponse
,
VectorStoreFileObject
,
VectorStoreFileContentResponse
,
VectorStoreFileObject
,
},
},
Client
,
};
};
/// Vector store files represent files inside a vector store.
/// Vector store files represent files inside a vector store.
...
...
lib/async-openai/src/vector_stores.rs
View file @
bce74588
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
use
serde
::
Serialize
;
use
serde
::
Serialize
;
use
crate
::{
use
crate
::{
Client
,
VectorStoreFiles
,
config
::
Config
,
config
::
Config
,
error
::
OpenAIError
,
error
::
OpenAIError
,
types
::{
types
::{
...
@@ -19,7 +20,6 @@ use crate::{
...
@@ -19,7 +20,6 @@ use crate::{
VectorStoreSearchResultsPage
,
VectorStoreSearchResultsPage
,
},
},
vector_store_file_batches
::
VectorStoreFileBatches
,
vector_store_file_batches
::
VectorStoreFileBatches
,
Client
,
VectorStoreFiles
,
};
};
pub
struct
VectorStores
<
'c
,
C
:
Config
>
{
pub
struct
VectorStores
<
'c
,
C
:
Config
>
{
...
...
lib/async-openai/tests/bring-your-own-type.rs
View file @
bce74588
...
@@ -12,9 +12,9 @@
...
@@ -12,9 +12,9 @@
//! The purpose of this test to make sure that all _byot methods compiles with custom types.
//! The purpose of this test to make sure that all _byot methods compiles with custom types.
use
std
::
pin
::
Pin
;
use
std
::
pin
::
Pin
;
use
dynamo_async_openai
::{
error
::
OpenAIError
,
Client
};
use
dynamo_async_openai
::{
Client
,
error
::
OpenAIError
};
use
futures
::
Stream
;
use
futures
::
Stream
;
use
serde_json
::{
json
,
Value
};
use
serde_json
::{
Value
,
json
};
impl
dynamo_async_openai
::
traits
::
AsyncTryFrom
<
MyJson
>
for
reqwest
::
multipart
::
Form
{
impl
dynamo_async_openai
::
traits
::
AsyncTryFrom
<
MyJson
>
for
reqwest
::
multipart
::
Form
{
type
Error
=
OpenAIError
;
type
Error
=
OpenAIError
;
...
...
lib/async-openai/tests/whisper.rs
View file @
bce74588
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
// Licensed under Apache 2.0
// Licensed under Apache 2.0
use
dynamo_async_openai
::
types
::
CreateTranslationRequestArgs
;
use
dynamo_async_openai
::
types
::
CreateTranslationRequestArgs
;
use
dynamo_async_openai
::{
types
::
CreateTranscriptionRequestArgs
,
Client
};
use
dynamo_async_openai
::{
Client
,
types
::
CreateTranscriptionRequestArgs
};
use
tokio_test
::
assert_err
;
use
tokio_test
::
assert_err
;
#[tokio::test]
#[tokio::test]
...
...
lib/bindings/c/src/lib.rs
View file @
bce74588
...
@@ -48,7 +48,7 @@ pub enum DynamoLlmResult {
...
@@ -48,7 +48,7 @@ pub enum DynamoLlmResult {
/// # Safety
/// # Safety
/// the namespace_c_str and component_c_str are passed as pointers to C strings
/// the namespace_c_str and component_c_str are passed as pointers to C strings
#[no_mangle]
#[
unsafe(
no_mangle
)
]
pub
unsafe
extern
"C"
fn
dynamo_llm_init
(
pub
unsafe
extern
"C"
fn
dynamo_llm_init
(
namespace_c_str
:
*
const
c_char
,
namespace_c_str
:
*
const
c_char
,
component_c_str
:
*
const
c_char
,
component_c_str
:
*
const
c_char
,
...
@@ -108,7 +108,7 @@ pub unsafe extern "C" fn dynamo_llm_init(
...
@@ -108,7 +108,7 @@ pub unsafe extern "C" fn dynamo_llm_init(
}
}
}
}
#[no_mangle]
#[
unsafe(
no_mangle
)
]
pub
extern
"C"
fn
dynamo_llm_shutdown
()
->
DynamoLlmResult
{
pub
extern
"C"
fn
dynamo_llm_shutdown
()
->
DynamoLlmResult
{
let
wk
=
match
WK
.get
()
{
let
wk
=
match
WK
.get
()
{
Some
(
wk
)
=>
wk
,
Some
(
wk
)
=>
wk
,
...
@@ -123,7 +123,7 @@ pub extern "C" fn dynamo_llm_shutdown() -> DynamoLlmResult {
...
@@ -123,7 +123,7 @@ pub extern "C" fn dynamo_llm_shutdown() -> DynamoLlmResult {
DynamoLlmResult
::
OK
DynamoLlmResult
::
OK
}
}
#[no_mangle]
#[
unsafe(
no_mangle
)
]
pub
extern
"C"
fn
dynamo_llm_load_publisher_create
()
->
DynamoLlmResult
{
pub
extern
"C"
fn
dynamo_llm_load_publisher_create
()
->
DynamoLlmResult
{
DynamoLlmResult
::
OK
DynamoLlmResult
::
OK
}
}
...
@@ -191,11 +191,7 @@ fn kv_event_create_stored_from_parts(
...
@@ -191,11 +191,7 @@ fn kv_event_create_stored_from_parts(
if
num_toks
!=
(
kv_block_size
as
usize
)
{
if
num_toks
!=
(
kv_block_size
as
usize
)
{
if
WARN_COUNT
if
WARN_COUNT
.fetch_update
(
Ordering
::
SeqCst
,
Ordering
::
SeqCst
,
|
c
|
{
.fetch_update
(
Ordering
::
SeqCst
,
Ordering
::
SeqCst
,
|
c
|
{
if
c
<
3
{
if
c
<
3
{
Some
(
c
+
1
)
}
else
{
None
}
Some
(
c
+
1
)
}
else
{
None
}
})
})
.is_ok
()
.is_ok
()
{
{
...
@@ -256,7 +252,7 @@ pub struct DynamoKvStoredEventParams {
...
@@ -256,7 +252,7 @@ pub struct DynamoKvStoredEventParams {
/// # Safety
/// # Safety
/// parent_hash is passed as pointer to indicate whether the blocks
/// parent_hash is passed as pointer to indicate whether the blocks
/// has a parent hash or not. nullptr is used to represent no parent hash
/// has a parent hash or not. nullptr is used to represent no parent hash
#[no_mangle]
#[
unsafe(
no_mangle
)
]
pub
unsafe
extern
"C"
fn
dynamo_kv_event_publish_stored
(
pub
unsafe
extern
"C"
fn
dynamo_kv_event_publish_stored
(
event_id
:
u64
,
event_id
:
u64
,
token_ids
:
*
const
u32
,
token_ids
:
*
const
u32
,
...
@@ -293,7 +289,7 @@ pub unsafe extern "C" fn dynamo_kv_event_publish_stored(
...
@@ -293,7 +289,7 @@ pub unsafe extern "C" fn dynamo_kv_event_publish_stored(
}
}
}
}
#[no_mangle]
#[
unsafe(
no_mangle
)
]
pub
extern
"C"
fn
dynamo_kv_event_publish_removed
(
pub
extern
"C"
fn
dynamo_kv_event_publish_removed
(
event_id
:
u64
,
event_id
:
u64
,
block_ids
:
*
const
u64
,
block_ids
:
*
const
u64
,
...
...
lib/engines/llamacpp/src/lib.rs
View file @
bce74588
...
@@ -10,17 +10,17 @@ use std::{
...
@@ -10,17 +10,17 @@ use std::{
use
async_stream
::
stream
;
use
async_stream
::
stream
;
use
dynamo_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynamo_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynamo_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynamo_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynamo_runtime
::
pipeline
::{
async_trait
,
Error
,
ManyOut
,
SingleIn
};
use
dynamo_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
,
async_trait
};
use
dynamo_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynamo_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynamo_runtime
::{
CancellationToken
,
ErrorContext
,
Result
};
use
dynamo_runtime
::{
CancellationToken
,
ErrorContext
,
Result
};
use
llama_cpp_2
::{
use
llama_cpp_2
::{
context
::{
params
::
LlamaContextParams
,
LlamaContext
},
LogOptions
,
context
::{
LlamaContext
,
params
::
LlamaContextParams
},
llama_backend
::
LlamaBackend
,
llama_backend
::
LlamaBackend
,
llama_batch
::
LlamaBatch
,
llama_batch
::
LlamaBatch
,
model
::{
params
::
LlamaModelParams
,
LlamaModel
},
model
::{
LlamaModel
,
params
::
LlamaModelParams
},
sampling
::
LlamaSampler
,
sampling
::
LlamaSampler
,
token
::
LlamaToken
,
token
::
LlamaToken
,
LogOptions
,
};
};
use
dynamo_llm
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
use
dynamo_llm
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
...
...
lib/engines/mistralrs/src/lib.rs
View file @
bce74588
...
@@ -25,7 +25,7 @@ use dynamo_runtime::protocols::annotated::Annotated;
...
@@ -25,7 +25,7 @@ use dynamo_runtime::protocols::annotated::Annotated;
use
dynamo_llm
::
protocols
::
openai
::{
use
dynamo_llm
::
protocols
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
prompt_to_string
,
NvCreateCompletionRequest
,
NvCreateCompletionResponse
},
completions
::{
NvCreateCompletionRequest
,
NvCreateCompletionResponse
,
prompt_to_string
},
embeddings
::{
NvCreateEmbeddingRequest
,
NvCreateEmbeddingResponse
},
embeddings
::{
NvCreateEmbeddingRequest
,
NvCreateEmbeddingResponse
},
};
};
...
@@ -240,9 +240,10 @@ impl MistralRsEngine {
...
@@ -240,9 +240,10 @@ impl MistralRsEngine {
}));
}));
// Send warmup request and consume response
// Send warmup request and consume response
if
let
Ok
(
sender
)
=
engine
.mistralrs
.get_sender
(
None
)
{
if
let
Ok
(
sender
)
=
engine
.mistralrs
.get_sender
(
None
)
if
let
Ok
(())
=
sender
.send
(
warmup_request
)
.await
{
&&
let
Ok
(())
=
sender
.send
(
warmup_request
)
.await
if
let
Some
(
response
)
=
rx
.recv
()
.await
{
&&
let
Some
(
response
)
=
rx
.recv
()
.await
{
match
response
.as_result
()
{
match
response
.as_result
()
{
Ok
(
r
)
=>
{
Ok
(
r
)
=>
{
tracing
::
debug!
(
request_id
,
"Warmup response: {r:?}"
);
tracing
::
debug!
(
request_id
,
"Warmup response: {r:?}"
);
...
@@ -252,8 +253,6 @@ impl MistralRsEngine {
...
@@ -252,8 +253,6 @@ impl MistralRsEngine {
}
}
}
}
}
}
}
}
Ok
(
engine
)
Ok
(
engine
)
}
}
...
...
lib/llm/benches/tokenizer.rs
View file @
bce74588
...
@@ -4,13 +4,13 @@
...
@@ -4,13 +4,13 @@
use
std
::
hint
::
black_box
;
use
std
::
hint
::
black_box
;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
criterion
::{
criterion_group
,
criterion_main
,
Criterion
,
Throughput
};
use
criterion
::{
Criterion
,
Throughput
,
criterion_group
,
criterion_main
};
use
dynamo_llm
::
backend
::
Decoder
;
use
dynamo_llm
::
backend
::
Decoder
;
use
dynamo_llm
::
protocols
::
common
::
StopConditions
;
use
dynamo_llm
::
protocols
::
common
::
StopConditions
;
use
dynamo_llm
::
tokenizers
::
DecodeStream
;
use
dynamo_llm
::
tokenizers
::
hf
::
HuggingFaceTokenizer
;
use
dynamo_llm
::
tokenizers
::
hf
::
HuggingFaceTokenizer
;
use
dynamo_llm
::
tokenizers
::
traits
::{
Encoder
,
Tokenizer
};
use
dynamo_llm
::
tokenizers
::
traits
::{
Encoder
,
Tokenizer
};
use
dynamo_llm
::
tokenizers
::
DecodeStream
;
use
dynamo_llm
::
types
::
TokenIdType
;
use
dynamo_llm
::
types
::
TokenIdType
;
const
TEST_TOKENIZER
:
&
str
=
concat!
(
const
TEST_TOKENIZER
:
&
str
=
concat!
(
...
...
lib/llm/src/backend.rs
View file @
bce74588
...
@@ -24,22 +24,22 @@ use tracing as log;
...
@@ -24,22 +24,22 @@ use tracing as log;
use
crate
::
model_card
::{
ModelDeploymentCard
,
TokenizerKind
};
use
crate
::
model_card
::{
ModelDeploymentCard
,
TokenizerKind
};
use
dynamo_runtime
::{
use
dynamo_runtime
::{
pipeline
::{
pipeline
::{
async_trait
,
AsyncEngineContextProvider
,
ManyOut
,
Operator
,
ResponseStream
,
AsyncEngineContextProvider
,
ManyOut
,
Operator
,
ResponseStream
,
ServerStreamingEngine
,
S
erverStreamingEngine
,
SingleIn
,
S
ingleIn
,
async_trait
,
},
},
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
};
};
use
crate
::
protocols
::{
use
crate
::
protocols
::{
TokenIdType
,
common
::{
common
::{
StopConditions
,
llm_backend
::{
llm_backend
::{
BackendOutput
,
EmbeddingsEngineOutput
,
FinishReason
,
LLMEngineOutput
,
BackendOutput
,
EmbeddingsEngineOutput
,
FinishReason
,
LLMEngineOutput
,
PreprocessedRequest
,
PreprocessedRequest
,
},
},
preprocessor
::
PreprocessedEmbeddingRequest
,
preprocessor
::
PreprocessedEmbeddingRequest
,
StopConditions
,
},
},
TokenIdType
,
};
};
use
crate
::
tokenizers
::{
DecodeStream
,
HuggingFaceTokenizer
,
Tokenizer
};
use
crate
::
tokenizers
::{
DecodeStream
,
HuggingFaceTokenizer
,
Tokenizer
};
use
tokenizers
::
Tokenizer
as
HfTokenizer
;
use
tokenizers
::
Tokenizer
as
HfTokenizer
;
...
@@ -149,11 +149,12 @@ impl
...
@@ -149,11 +149,12 @@ impl
}
}
// if we have a data field without an event, then we might need to update the data
// if we have a data field without an event, then we might need to update the data
if
let
Some
(
data
)
=
&
output
.data
{
if
let
Some
(
data
)
=
&
output
.data
if
data
.text
.is_some
()
&&
!
state
.validate_engine_decode
{
&&
data
.text
.is_some
()
&&
!
state
.validate_engine_decode
{
return
Some
((
output
,
state
));
return
Some
((
output
,
state
));
}
}
}
let
data
=
output
.data
.as_ref
()
.unwrap
();
let
data
=
output
.data
.as_ref
()
.unwrap
();
...
@@ -425,8 +426,9 @@ impl Decoder {
...
@@ -425,8 +426,9 @@ impl Decoder {
// check stop sequences - the jail will always hold at least the largest stop sequence
// check stop sequences - the jail will always hold at least the largest stop sequence
// if jail_max_bytes is 0, then there are no stop sequences
// if jail_max_bytes is 0, then there are no stop sequences
if
self
.jail_max_bytes
>
0
{
if
self
.jail_max_bytes
>
0
if
let
Some
(
token
)
=
&
token
{
&&
let
Some
(
token
)
=
&
token
{
let
pre_append
=
self
.jail
.len
();
let
pre_append
=
self
.jail
.len
();
log
::
debug!
(
"pre_append: {}"
,
pre_append
);
log
::
debug!
(
"pre_append: {}"
,
pre_append
);
log
::
debug!
(
"jail: {}"
,
self
.jail
);
log
::
debug!
(
"jail: {}"
,
self
.jail
);
...
@@ -436,8 +438,7 @@ impl Decoder {
...
@@ -436,8 +438,7 @@ impl Decoder {
for
seq
in
&
self
.hidden_stop_sequences
{
for
seq
in
&
self
.hidden_stop_sequences
{
log
::
debug!
(
"stop seq: {}"
,
seq
);
log
::
debug!
(
"stop seq: {}"
,
seq
);
if
let
Some
(
offset
)
=
if
let
Some
(
offset
)
=
galil_seiferas
::
gs_find
(
self
.jail
.as_bytes
(),
seq
.as_bytes
())
galil_seiferas
::
gs_find
(
self
.jail
.as_bytes
(),
seq
.as_bytes
())
{
{
log
::
debug!
(
"offset: {}"
,
offset
);
log
::
debug!
(
"offset: {}"
,
offset
);
// return only new bytes after pre_append .. offset+seq.len()
// return only new bytes after pre_append .. offset+seq.len()
...
@@ -465,7 +466,6 @@ impl Decoder {
...
@@ -465,7 +466,6 @@ impl Decoder {
self
.jail
.drain
(
0
..
drain_len
);
self
.jail
.drain
(
0
..
drain_len
);
}
}
}
}
}
Ok
(
StepResult
::
ok
(
token
))
Ok
(
StepResult
::
ok
(
token
))
}
}
...
@@ -485,12 +485,10 @@ impl Decoder {
...
@@ -485,12 +485,10 @@ impl Decoder {
.map
(|
x
|
x
.should_hide_text
())
.map
(|
x
|
x
.should_hide_text
())
.unwrap_or
(
false
);
.unwrap_or
(
false
);
if
!
hide_text
{
if
!
hide_text
&&
let
Some
(
token
)
=
&
token
{
if
let
Some
(
token
)
=
&
token
{
text
.get_or_insert_with
(||
String
::
with_capacity
(
token_ids
.len
()))
text
.get_or_insert_with
(||
String
::
with_capacity
(
token_ids
.len
()))
.push_str
(
token
);
.push_str
(
token
);
}
}
}
tokens
.push
(
token
);
tokens
.push
(
token
);
if
let
Some
(
stop_trigger
)
=
stop_trigger
{
if
let
Some
(
stop_trigger
)
=
stop_trigger
{
...
...
lib/llm/src/block_manager.rs
View file @
bce74588
...
@@ -38,18 +38,18 @@ pub mod controller;
...
@@ -38,18 +38,18 @@ pub mod controller;
pub
use
crate
::
common
::
dtype
::
DType
;
pub
use
crate
::
common
::
dtype
::
DType
;
pub
use
block
::{
pub
use
block
::{
BasicMetadata
,
BlockMetadata
,
Blocks
,
ImmutableBlock
,
MutableBlock
,
locality
::{
self
,
LocalityProvider
,
LogicalResources
},
locality
::{
self
,
LocalityProvider
,
LogicalResources
},
nixl
::{
BlockDescriptorList
,
IsImmutable
,
IsMutable
,
MutabilityKind
,
RemoteBlock
},
nixl
::{
BlockDescriptorList
,
IsImmutable
,
IsMutable
,
MutabilityKind
,
RemoteBlock
},
BasicMetadata
,
BlockMetadata
,
Blocks
,
ImmutableBlock
,
MutableBlock
,
};
};
pub
use
config
::
*
;
pub
use
config
::
*
;
pub
use
layout
::{
nixl
::
NixlLayout
,
LayoutConfig
,
LayoutConfigBuilder
,
LayoutError
,
LayoutType
};
pub
use
layout
::{
LayoutConfig
,
LayoutConfigBuilder
,
LayoutError
,
LayoutType
,
nixl
::
NixlLayout
};
pub
use
offload
::
request
::
BlockResult
;
pub
use
offload
::
request
::
BlockResult
;
pub
use
pool
::{
BlockPool
,
ManagedBlockPool
};
pub
use
pool
::{
BlockPool
,
ManagedBlockPool
};
pub
use
storage
::{
pub
use
storage
::{
nixl
::
NixlRegisterableStorage
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
,
Storage
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
,
Storage
,
StorageAllocator
,
StorageAllocator
,
nixl
::
NixlRegisterableStorage
,
};
};
pub
use
tokio_util
::
sync
::
CancellationToken
;
pub
use
tokio_util
::
sync
::
CancellationToken
;
...
...
lib/llm/src/block_manager/block.rs
View file @
bce74588
...
@@ -21,7 +21,7 @@ pub mod registry;
...
@@ -21,7 +21,7 @@ pub mod registry;
pub
mod
state
;
pub
mod
state
;
pub
mod
transfer
;
pub
mod
transfer
;
pub
use
data
::{
view
,
BlockData
,
BlockDataExt
,
BlockDataProvider
,
BlockDataProviderMut
};
pub
use
data
::{
BlockData
,
BlockDataExt
,
BlockDataProvider
,
BlockDataProviderMut
,
view
};
pub
use
locality
::
LocalityProvider
;
pub
use
locality
::
LocalityProvider
;
pub
use
crate
::
tokens
::
TokenBlockError
;
pub
use
crate
::
tokens
::
TokenBlockError
;
...
@@ -37,10 +37,10 @@ use crate::block_manager::{
...
@@ -37,10 +37,10 @@ use crate::block_manager::{
use
crate
::
tokens
::{
SaltHash
,
SequenceHash
,
Token
,
TokenBlock
,
Tokens
};
use
crate
::
tokens
::{
SaltHash
,
SequenceHash
,
Token
,
TokenBlock
,
Tokens
};
use
super
::{
use
super
::{
WorkerID
,
events
::
PublishHandle
,
events
::
PublishHandle
,
layout
::{
BlockLayout
,
LayoutError
,
LayoutType
},
layout
::{
BlockLayout
,
LayoutError
,
LayoutType
},
storage
::
StorageType
,
storage
::
StorageType
,
WorkerID
,
};
};
use
derive_getters
::
Getters
;
use
derive_getters
::
Getters
;
...
@@ -657,12 +657,12 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> std::fmt::Debug for Muta
...
@@ -657,12 +657,12 @@ impl<S: Storage, L: LocalityProvider, M: BlockMetadata> std::fmt::Debug for Muta
impl
<
S
:
Storage
,
L
:
LocalityProvider
,
M
:
BlockMetadata
>
Drop
for
MutableBlock
<
S
,
L
,
M
>
{
impl
<
S
:
Storage
,
L
:
LocalityProvider
,
M
:
BlockMetadata
>
Drop
for
MutableBlock
<
S
,
L
,
M
>
{
fn
drop
(
&
mut
self
)
{
fn
drop
(
&
mut
self
)
{
tracing
::
debug!
(
"drop: {:?}"
,
self
);
tracing
::
debug!
(
"drop: {:?}"
,
self
);
if
let
Some
(
block
)
=
self
.block
.take
()
{
if
let
Some
(
block
)
=
self
.block
.take
()
if
self
.return_tx
.send
(
block
)
.is_err
()
{
&&
self
.return_tx
.send
(
block
)
.is_err
()
{
tracing
::
warn!
(
"block pool shutdown before block was returned"
);
tracing
::
warn!
(
"block pool shutdown before block was returned"
);
}
}
}
}
}
}
}
impl
<
S
:
Storage
,
L
:
LocalityProvider
,
M
:
BlockMetadata
>
Deref
for
MutableBlock
<
S
,
L
,
M
>
{
impl
<
S
:
Storage
,
L
:
LocalityProvider
,
M
:
BlockMetadata
>
Deref
for
MutableBlock
<
S
,
L
,
M
>
{
...
@@ -957,9 +957,9 @@ pub mod nixl {
...
@@ -957,9 +957,9 @@ pub mod nixl {
use
super
::
view
::{
BlockKind
,
Kind
,
LayerKind
};
use
super
::
view
::{
BlockKind
,
Kind
,
LayerKind
};
use
super
::
super
::{
use
super
::
super
::{
WorkerID
,
layout
::
nixl
::{
NixlLayout
,
SerializedNixlBlockLayout
},
layout
::
nixl
::{
NixlLayout
,
SerializedNixlBlockLayout
},
storage
::
nixl
::{
MemType
,
NixlRegisterableStorage
,
NixlStorage
},
storage
::
nixl
::{
MemType
,
NixlRegisterableStorage
,
NixlStorage
},
WorkerID
,
};
};
use
derive_getters
::{
Dissolve
,
Getters
};
use
derive_getters
::{
Dissolve
,
Getters
};
...
@@ -1360,9 +1360,7 @@ pub mod nixl {
...
@@ -1360,9 +1360,7 @@ pub mod nixl {
#[error(
"Input block list cannot be empty"
)]
#[error(
"Input block list cannot be empty"
)]
EmptyInput
,
EmptyInput
,
#[error(
#[error(
"Blocks in the input list are not homogeneous (worker_id, block_set_idx mismatch)"
)]
"Blocks in the input list are not homogeneous (worker_id, block_set_idx mismatch)"
)]
NotHomogeneous
,
NotHomogeneous
,
#[error(
"Serialization failed: {0}"
)]
#[error(
"Serialization failed: {0}"
)]
...
...
lib/llm/src/block_manager/block/data.rs
View file @
bce74588
...
@@ -46,7 +46,11 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
...
@@ -46,7 +46,11 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
fn
is_local_mut
(
&
mut
self
)
->
Option
<&
mut
dyn
BlockDataViews
<
S
>>
;
fn
is_local_mut
(
&
mut
self
)
->
Option
<&
mut
dyn
BlockDataViews
<
S
>>
;
/// Get a read-only view of this block's storage for a layer
/// Get a read-only view of this block's storage for a layer
fn
layer_view
(
&
self
,
layer_idx
:
usize
,
outer_idx
:
usize
)
->
BlockResult
<
view
::
LayerView
<
S
>>
{
fn
layer_view
(
&
self
,
layer_idx
:
usize
,
outer_idx
:
usize
,
)
->
BlockResult
<
view
::
LayerView
<
'_
,
S
>>
{
match
self
.is_local
()
{
match
self
.is_local
()
{
Some
(
views
)
=>
views
.local_layer_view
(
layer_idx
,
outer_idx
),
Some
(
views
)
=>
views
.local_layer_view
(
layer_idx
,
outer_idx
),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
...
@@ -58,7 +62,7 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
...
@@ -58,7 +62,7 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
&
mut
self
,
&
mut
self
,
layer_idx
:
usize
,
layer_idx
:
usize
,
outer_idx
:
usize
,
outer_idx
:
usize
,
)
->
BlockResult
<
view
::
LayerViewMut
<
S
>>
{
)
->
BlockResult
<
view
::
LayerViewMut
<
'_
,
S
>>
{
match
self
.is_local_mut
()
{
match
self
.is_local_mut
()
{
Some
(
views
)
=>
views
.local_layer_view_mut
(
layer_idx
,
outer_idx
),
Some
(
views
)
=>
views
.local_layer_view_mut
(
layer_idx
,
outer_idx
),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
...
@@ -66,7 +70,7 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
...
@@ -66,7 +70,7 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
}
}
/// Get a read-only view of this block's storage
/// Get a read-only view of this block's storage
fn
block_view
(
&
self
)
->
BlockResult
<
view
::
BlockView
<
S
>>
{
fn
block_view
(
&
self
)
->
BlockResult
<
view
::
BlockView
<
'_
,
S
>>
{
match
self
.is_local
()
{
match
self
.is_local
()
{
Some
(
views
)
=>
views
.local_block_view
(),
Some
(
views
)
=>
views
.local_block_view
(),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
...
@@ -74,7 +78,7 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
...
@@ -74,7 +78,7 @@ pub trait BlockDataExt<S: Storage>: Send + Sync + 'static + std::fmt::Debug {
}
}
/// Get a mutable view of this block's storage
/// Get a mutable view of this block's storage
fn
block_view_mut
(
&
mut
self
)
->
BlockResult
<
view
::
BlockViewMut
<
S
>>
{
fn
block_view_mut
(
&
mut
self
)
->
BlockResult
<
view
::
BlockViewMut
<
'_
,
S
>>
{
match
self
.is_local_mut
()
{
match
self
.is_local_mut
()
{
Some
(
views
)
=>
views
.local_block_view_mut
(),
Some
(
views
)
=>
views
.local_block_view_mut
(),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
None
=>
Err
(
BlockError
::
ViewsNotAvailableOnLogicalBlocks
),
...
@@ -88,20 +92,20 @@ pub trait BlockDataViews<S: Storage> {
...
@@ -88,20 +92,20 @@ pub trait BlockDataViews<S: Storage> {
&
self
,
&
self
,
layer_idx
:
usize
,
layer_idx
:
usize
,
outer_idx
:
usize
,
outer_idx
:
usize
,
)
->
BlockResult
<
view
::
LayerView
<
S
>>
;
)
->
BlockResult
<
view
::
LayerView
<
'_
,
S
>>
;
/// Get a mutable view of this block's storage for a layer
/// Get a mutable view of this block's storage for a layer
fn
local_layer_view_mut
(
fn
local_layer_view_mut
(
&
mut
self
,
&
mut
self
,
layer_idx
:
usize
,
layer_idx
:
usize
,
outer_idx
:
usize
,
outer_idx
:
usize
,
)
->
BlockResult
<
view
::
LayerViewMut
<
S
>>
;
)
->
BlockResult
<
view
::
LayerViewMut
<
'_
,
S
>>
;
/// Get a read-only view of this block's storage
/// Get a read-only view of this block's storage
fn
local_block_view
(
&
self
)
->
BlockResult
<
view
::
BlockView
<
S
>>
;
fn
local_block_view
(
&
self
)
->
BlockResult
<
view
::
BlockView
<
'_
,
S
>>
;
/// Get a mutable view of this block's storage
/// Get a mutable view of this block's storage
fn
local_block_view_mut
(
&
mut
self
)
->
BlockResult
<
view
::
BlockViewMut
<
S
>>
;
fn
local_block_view_mut
(
&
mut
self
)
->
BlockResult
<
view
::
BlockViewMut
<
'_
,
S
>>
;
}
}
pub
trait
BlockDataProvider
:
StorageTypeProvider
{
pub
trait
BlockDataProvider
:
StorageTypeProvider
{
...
...
lib/llm/src/block_manager/block/data/local.rs
View file @
bce74588
...
@@ -101,7 +101,7 @@ impl<S: Storage> BlockDataViews<S> for LocalBlockData<S> {
...
@@ -101,7 +101,7 @@ impl<S: Storage> BlockDataViews<S> for LocalBlockData<S> {
&
self
,
&
self
,
layer_idx
:
usize
,
layer_idx
:
usize
,
outer_idx
:
usize
,
outer_idx
:
usize
,
)
->
BlockResult
<
view
::
LayerView
<
S
>>
{
)
->
BlockResult
<
view
::
LayerView
<
'_
,
S
>>
{
let
mr
=
self
let
mr
=
self
.layout
.layout
.memory_region
(
self
.block_idx
,
layer_idx
,
outer_idx
)
?
;
.memory_region
(
self
.block_idx
,
layer_idx
,
outer_idx
)
?
;
...
@@ -113,14 +113,14 @@ impl<S: Storage> BlockDataViews<S> for LocalBlockData<S> {
...
@@ -113,14 +113,14 @@ impl<S: Storage> BlockDataViews<S> for LocalBlockData<S> {
&
mut
self
,
&
mut
self
,
layer_idx
:
usize
,
layer_idx
:
usize
,
outer_idx
:
usize
,
outer_idx
:
usize
,
)
->
BlockResult
<
view
::
LayerViewMut
<
S
>>
{
)
->
BlockResult
<
view
::
LayerViewMut
<
'_
,
S
>>
{
let
mr
=
self
let
mr
=
self
.layout
.layout
.memory_region
(
self
.block_idx
,
layer_idx
,
outer_idx
)
?
;
.memory_region
(
self
.block_idx
,
layer_idx
,
outer_idx
)
?
;
unsafe
{
view
::
LayerViewMut
::
new
(
self
,
mr
.addr
(),
mr
.size
(),
mr
.storage_type
())
}
unsafe
{
view
::
LayerViewMut
::
new
(
self
,
mr
.addr
(),
mr
.size
(),
mr
.storage_type
())
}
}
}
fn
local_block_view
(
&
self
)
->
BlockResult
<
view
::
BlockView
<
S
>>
{
fn
local_block_view
(
&
self
)
->
BlockResult
<
view
::
BlockView
<
'_
,
S
>>
{
if
self
.is_fully_contiguous
()
{
if
self
.is_fully_contiguous
()
{
let
mr
=
self
.layout
.memory_region
(
self
.block_idx
,
0
,
0
)
?
;
let
mr
=
self
.layout
.memory_region
(
self
.block_idx
,
0
,
0
)
?
;
let
offset
=
mr
.addr
();
let
offset
=
mr
.addr
();
...
@@ -134,7 +134,7 @@ impl<S: Storage> BlockDataViews<S> for LocalBlockData<S> {
...
@@ -134,7 +134,7 @@ impl<S: Storage> BlockDataViews<S> for LocalBlockData<S> {
}
}
}
}
fn
local_block_view_mut
(
&
mut
self
)
->
BlockResult
<
view
::
BlockViewMut
<
S
>>
{
fn
local_block_view_mut
(
&
mut
self
)
->
BlockResult
<
view
::
BlockViewMut
<
'_
,
S
>>
{
if
self
.is_fully_contiguous
()
{
if
self
.is_fully_contiguous
()
{
let
mr
=
self
.layout
.memory_region
(
self
.block_idx
,
0
,
0
)
?
;
let
mr
=
self
.layout
.memory_region
(
self
.block_idx
,
0
,
0
)
?
;
let
offset
=
mr
.addr
();
let
offset
=
mr
.addr
();
...
...
Prev
1
2
3
4
5
6
7
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment