Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a1aea900
Unverified
Commit
a1aea900
authored
Jun 09, 2025
by
jthomson04
Committed by
GitHub
Jun 09, 2025
Browse files
feat: KVBM prometheus monitoring (#1211)
parent
312ee8e2
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
178 additions
and
15 deletions
+178
-15
lib/llm/src/block_manager.rs
lib/llm/src/block_manager.rs
+1
-0
lib/llm/src/block_manager/config.rs
lib/llm/src/block_manager/config.rs
+4
-0
lib/llm/src/block_manager/metrics.rs
lib/llm/src/block_manager/metrics.rs
+82
-0
lib/llm/src/block_manager/offload.rs
lib/llm/src/block_manager/offload.rs
+25
-4
lib/llm/src/block_manager/pool.rs
lib/llm/src/block_manager/pool.rs
+18
-2
lib/llm/src/block_manager/pool/state.rs
lib/llm/src/block_manager/pool/state.rs
+37
-8
lib/llm/src/block_manager/state.rs
lib/llm/src/block_manager/state.rs
+11
-1
No files found.
lib/llm/src/block_manager.rs
View file @
a1aea900
...
...
@@ -25,6 +25,7 @@ mod state;
pub
mod
block
;
pub
mod
events
;
pub
mod
layout
;
pub
mod
metrics
;
pub
mod
offload
;
pub
mod
pool
;
pub
mod
storage
;
...
...
lib/llm/src/block_manager/config.rs
View file @
a1aea900
...
...
@@ -15,6 +15,7 @@
use
super
::
events
::
EventManager
;
use
super
::
*
;
use
prometheus
::
Registry
;
#[derive(Debug,
Clone)]
pub
enum
NixlOptions
{
...
...
@@ -41,6 +42,9 @@ pub struct KvManagerRuntimeConfig {
#[builder(default)]
pub
async_runtime
:
Option
<
Arc
<
tokio
::
runtime
::
Runtime
>>
,
#[builder(default
=
"Arc::new(Registry::new())"
)]
pub
metrics_registry
:
Arc
<
Registry
>
,
}
impl
KvManagerRuntimeConfig
{
...
...
lib/llm/src/block_manager/metrics.rs
0 → 100644
View file @
a1aea900
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
anyhow
::
Result
;
use
prometheus
::{
core
::{
AtomicI64
,
AtomicU64
,
GenericCounter
,
GenericGauge
},
register_int_counter_vec_with_registry
,
register_int_gauge_vec_with_registry
,
IntCounterVec
,
IntGaugeVec
,
Opts
,
Registry
,
};
use
std
::
sync
::
Arc
;
pub
struct
BlockManagerMetrics
{
gauges
:
IntGaugeVec
,
counters
:
IntCounterVec
,
}
impl
BlockManagerMetrics
{
pub
fn
new
(
metrics_registry
:
&
Arc
<
Registry
>
)
->
Result
<
Arc
<
Self
>>
{
let
gauge_opts
=
Opts
::
new
(
"gauges"
,
"Gauges for the pools"
)
.namespace
(
"dynamo"
)
.subsystem
(
"kvbm"
);
let
counter_opts
=
Opts
::
new
(
"pools"
,
"Counters for the pools"
)
.namespace
(
"dynamo"
)
.subsystem
(
"kvbm"
);
let
gauges
=
register_int_gauge_vec_with_registry!
(
gauge_opts
,
&
[
"pool"
,
"metric_type"
],
metrics_registry
)
?
;
let
counters
=
register_int_counter_vec_with_registry!
(
counter_opts
,
&
[
"pool"
,
"metric_type"
],
metrics_registry
)
?
;
Ok
(
Arc
::
new
(
Self
{
gauges
,
counters
}))
}
pub
fn
pool
(
self
:
&
Arc
<
Self
>
,
group
:
&
str
)
->
Arc
<
PoolMetrics
>
{
PoolMetrics
::
new
(
self
,
group
)
}
}
pub
struct
PoolMetrics
{
block_manager_metrics
:
Arc
<
BlockManagerMetrics
>
,
group
:
String
,
}
impl
PoolMetrics
{
pub
fn
new
(
block_manager_metrics
:
&
Arc
<
BlockManagerMetrics
>
,
group
:
&
str
)
->
Arc
<
Self
>
{
Arc
::
new
(
Self
{
block_manager_metrics
:
block_manager_metrics
.clone
(),
group
:
group
.to_string
(),
})
}
pub
fn
gauge
(
&
self
,
metric_type
:
&
str
)
->
GenericGauge
<
AtomicI64
>
{
self
.block_manager_metrics
.gauges
.with_label_values
(
&
[
&
self
.group
,
&
metric_type
.to_string
()])
}
pub
fn
counter
(
&
self
,
metric_type
:
&
str
)
->
GenericCounter
<
AtomicU64
>
{
self
.block_manager_metrics
.counters
.with_label_values
(
&
[
&
self
.group
,
&
metric_type
.to_string
()])
}
}
lib/llm/src/block_manager/offload.rs
View file @
a1aea900
...
...
@@ -42,9 +42,10 @@
//! The [`OffloadManager::onboard_worker`] is responsible for onboarding blocks.
//!
//! The kind of offloads/onboards they perform is dictated by the source and target arguments
//! of the [`OffloadManager::offload`] and [`OffloadManager::onboard`] methods.
//! of the [`OffloadManager::offload
_worker
`] and [`OffloadManager::onboard
_worker
`] methods.
use
super
::
block
::{
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
TransferContext
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
pool
::
BlockPoolError
;
use
super
::
storage
::{
Cuda
,
Storage
};
use
super
::{
BlockPool
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
};
...
...
@@ -101,6 +102,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
device
:
Option
<
Arc
<
BlockPool
<
DeviceStorage
,
Metadata
>>>
,
nixl_agent
:
Arc
<
Option
<
NixlAgent
>>
,
async_rt_handle
:
Handle
,
metrics
:
Arc
<
BlockManagerMetrics
>
,
cancellation_token
:
CancellationToken
,
)
->
Result
<
Arc
<
Self
>>
{
let
(
device_offload_tx
,
device_offload_rx
)
=
mpsc
::
unbounded_channel
();
...
...
@@ -120,8 +122,6 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
tick
:
Arc
::
new
(
Mutex
::
new
(
0
)),
});
let
this_clone
=
this
.clone
();
let
cuda_ctx
=
Cuda
::
device_or_create
(
0
)
?
;
// We want cuda offloads to happen in parallel with host onboards, so we need to use a different stream.
...
...
@@ -147,6 +147,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
cancellation_token
.clone
(),
)),
metrics
.pool
(
"device"
),
cancellation_token
.clone
(),
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
...
@@ -179,6 +180,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
cancellation_token
.clone
(),
)),
metrics
.pool
(
"host"
),
cancellation_token
.clone
(),
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
...
@@ -205,6 +207,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
cancellation_token
.clone
(),
)),
metrics
.pool
(
"host"
),
cancellation_token
.clone
(),
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
...
@@ -231,6 +234,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
cancellation_token
.clone
(),
)),
metrics
.pool
(
"disk"
),
cancellation_token
.clone
(),
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
...
@@ -241,7 +245,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
)
?
.detach
();
Ok
(
this
_clone
)
Ok
(
this
)
}
async
fn
offload_worker
<
Source
:
Storage
,
Target
:
Storage
>
(
...
...
@@ -249,6 +253,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
target_pool
:
Option
<
Arc
<
BlockPool
<
Target
,
Metadata
>>>
,
mut
offload_rx
:
mpsc
::
UnboundedReceiver
<
OffloadRequest
<
Source
,
Metadata
>>
,
transfer_manager
:
Arc
<
dyn
TransferManager
<
Source
,
Target
,
Metadata
>>
,
pool_metrics
:
Arc
<
PoolMetrics
>
,
cancellation_token
:
CancellationToken
,
)
->
Result
<
()
>
{
if
source_pool
.is_none
()
||
target_pool
.is_none
()
{
...
...
@@ -270,6 +275,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
match
offload_rx
.try_recv
()
{
Ok
(
request
)
=>
{
queue
.insert
(
request
);
pool_metrics
.gauge
(
"offload_queue_size"
)
.inc
();
}
Err
(
TryRecvError
::
Empty
)
=>
{
break
;
...
...
@@ -280,6 +286,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
// If there is a request, process it.
if
let
Some
(
request
)
=
queue
.pop_first
()
{
pool_metrics
.gauge
(
"offload_queue_size"
)
.dec
();
// Try to upgrade the block to a strong reference.
let
block
=
match
request
.block
.upgrade
()
{
Some
(
block
)
=>
Some
(
block
),
...
...
@@ -311,6 +318,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
};
if
let
Some
(
target_block
)
=
target_blocks
.into_iter
()
.next
()
{
pool_metrics
.counter
(
"offload_processed"
)
.inc
();
transfer_manager
.enqueue_transfer
(
PendingTransfer
::
new
(
vec!
[
block
],
...
...
@@ -327,6 +335,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
_
=
cancellation_token
.cancelled
()
=>
return
Ok
(()),
Some
(
request
)
=
offload_rx
.recv
()
=>
{
queue
.insert
(
request
);
pool_metrics
.gauge
(
"offload_queue_size"
)
.inc
();
}
}
}
...
...
@@ -338,6 +347,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
target_pool
:
Option
<
Arc
<
BlockPool
<
Target
,
Metadata
>>>
,
mut
onboard_rx
:
mpsc
::
UnboundedReceiver
<
OnboardRequest
<
Source
,
Target
,
Metadata
>>
,
transfer_manager
:
Arc
<
dyn
TransferManager
<
Source
,
Target
,
Metadata
>>
,
pool_metrics
:
Arc
<
PoolMetrics
>
,
cancellation_token
:
CancellationToken
,
)
->
Result
<
()
>
{
if
source_pool
.is_none
()
||
target_pool
.is_none
()
{
...
...
@@ -349,6 +359,11 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
tokio
::
select!
{
_
=
cancellation_token
.cancelled
()
=>
return
Ok
::
<
(),
anyhow
::
Error
>
(()),
Some
(
request
)
=
onboard_rx
.recv
()
=>
{
pool_metrics
.gauge
(
"onboard_queue_size"
)
.set
(
onboard_rx
.len
()
as
i64
);
// Try to allocate blocks on the device.
let
target_blocks
=
match
target_pool
.allocate_blocks
(
request
.blocks
.len
())
.await
{
Ok
(
blocks
)
=>
blocks
,
...
...
@@ -358,6 +373,10 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
}
};
pool_metrics
.counter
(
"onboard_processed"
)
.inc_by
(
request
.blocks
.len
()
as
u64
);
let
sources
=
request
.blocks
.iter
()
...
...
@@ -535,6 +554,7 @@ mod tests {
use
aligned_vec
::
avec
;
use
cudarc
::
runtime
::
sys
::{
cudaMemcpy
,
cudaMemcpyKind
,
cudaMemset
};
use
prometheus
::
Registry
;
use
std
::
fs
::
File
;
use
std
::
io
::{
Read
,
Seek
,
SeekFrom
,
Write
};
use
std
::
mem
::
ManuallyDrop
;
...
...
@@ -621,6 +641,7 @@ mod tests {
device_pool
.clone
(),
agent_arc
,
async_rt_handle
,
BlockManagerMetrics
::
new
(
&
Arc
::
new
(
Registry
::
new
()))
?
,
CancellationToken
::
new
(),
)
?
;
...
...
lib/llm/src/block_manager/pool.rs
View file @
a1aea900
...
...
@@ -73,10 +73,12 @@ use super::block::{
GlobalRegistry
,
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
storage
::
Storage
;
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
prometheus
::
Registry
;
use
std
::{
collections
::{
BTreeSet
,
HashMap
,
VecDeque
},
sync
::{
Arc
,
Weak
},
...
...
@@ -124,12 +126,18 @@ pub struct BlockPoolArgs<S: Storage, M: BlockMetadata> {
#[builder(default
=
"Handle::current()"
)]
async_runtime
:
Handle
,
#[builder(
default
=
"BlockManagerMetrics::new(&Arc::new(Registry::new())).unwrap().pool(
\"
pool
\"
)"
)]
pool_metrics
:
Arc
<
PoolMetrics
>
,
}
impl
<
S
:
Storage
,
M
:
BlockMetadata
>
BlockPoolArgsBuilder
<
S
,
M
>
{
pub
fn
build
(
self
)
->
anyhow
::
Result
<
BlockPool
<
S
,
M
>>
{
let
args
=
self
.build_internal
()
?
;
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
)
=
args
.dissolve
();
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
,
metrics
)
=
args
.dissolve
();
tracing
::
info!
(
"building block pool"
);
let
pool
=
BlockPool
::
new
(
...
...
@@ -138,6 +146,7 @@ impl<S: Storage, M: BlockMetadata> BlockPoolArgsBuilder<S, M> {
blocks
,
global_registry
,
async_runtime
,
metrics
,
);
Ok
(
pool
)
...
...
@@ -216,6 +225,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
:
Vec
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
Self
{
let
(
pool
,
progress_engine
)
=
Self
::
with_progress_engine
(
event_manager
,
...
...
@@ -223,6 +233,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
,
global_registry
,
async_runtime
,
metrics
,
);
// pool.runtime.handle().spawn(async move {
...
...
@@ -262,6 +273,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
:
Vec
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
(
Self
,
ProgressEngine
<
S
,
M
>
)
{
let
(
priority_tx
,
priority_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
(
ctrl_tx
,
ctrl_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
...
...
@@ -274,6 +286,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
,
global_registry
,
async_runtime
,
metrics
,
);
(
...
...
@@ -474,6 +487,7 @@ struct State<S: Storage, M: BlockMetadata> {
registry
:
BlockRegistry
,
return_tx
:
tokio
::
sync
::
mpsc
::
UnboundedSender
<
Block
<
S
,
M
>>
,
event_manager
:
Arc
<
dyn
EventManager
>
,
metrics
:
Arc
<
PoolMetrics
>
,
}
struct
ProgressEngine
<
S
:
Storage
,
M
:
BlockMetadata
>
{
...
...
@@ -482,6 +496,7 @@ struct ProgressEngine<S: Storage, M: BlockMetadata> {
cancel_token
:
CancellationToken
,
state
:
State
<
S
,
M
>
,
return_rx
:
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
Block
<
S
,
M
>>
,
metrics
:
Arc
<
PoolMetrics
>
,
}
#[cfg(test)]
...
...
@@ -498,7 +513,7 @@ mod tests {
self
,
)
->
anyhow
::
Result
<
(
BlockPool
<
S
,
M
>
,
ProgressEngine
<
S
,
M
>
)
>
{
let
args
=
self
.build_internal
()
?
;
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
)
=
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
,
metrics
)
=
args
.dissolve
();
let
(
pool
,
progress_engine
)
=
BlockPool
::
with_progress_engine
(
event_manager
,
...
...
@@ -506,6 +521,7 @@ mod tests {
blocks
,
global_registry
,
async_runtime
,
metrics
,
);
Ok
((
pool
,
progress_engine
))
...
...
lib/llm/src/block_manager/pool/state.rs
View file @
a1aea900
...
...
@@ -26,6 +26,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
return_tx
:
tokio
::
sync
::
mpsc
::
UnboundedSender
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
Self
{
Self
{
active
:
ActiveBlockPool
::
new
(),
...
...
@@ -33,6 +34,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
registry
:
BlockRegistry
::
new
(
event_manager
.clone
(),
global_registry
,
async_runtime
),
return_tx
,
event_manager
,
metrics
,
}
}
...
...
@@ -126,6 +128,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
}
}
self
.metrics
.counter
(
"blocks_allocated"
)
.inc_by
(
count
as
u64
);
Ok
(
blocks
)
}
...
...
@@ -195,6 +201,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
assert_eq!
(
immutable_blocks
.len
(),
expected_len
);
self
.metrics
.counter
(
"blocks_registered"
)
.inc_by
(
immutable_blocks
.len
()
as
u64
);
Ok
(
immutable_blocks
)
}
...
...
@@ -204,9 +214,9 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
return_rx
:
&
mut
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
Block
<
S
,
M
>>
,
)
->
Vec
<
ImmutableBlock
<
S
,
M
>>
{
let
mut
immutable_blocks
=
Vec
::
new
();
for
sequence_hash
in
sequence_hashes
{
if
!
self
.registry
.is_registered
(
sequence_hash
)
{
re
turn
immutable_blocks
;
for
sequence_hash
in
&
sequence_hashes
{
if
!
self
.registry
.is_registered
(
*
sequence_hash
)
{
b
re
ak
;
}
// the block is registered, so to get it from either the:
...
...
@@ -214,16 +224,17 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
// 2. inactive pool
// 3. return channel
if
let
Some
(
immutable
)
=
self
.active
.match_sequence_hash
(
sequence_hash
)
{
if
let
Some
(
immutable
)
=
self
.active
.match_sequence_hash
(
*
sequence_hash
)
{
immutable_blocks
.push
(
immutable
);
continue
;
}
let
raw_block
=
if
let
Some
(
raw_block
)
=
self
.inactive
.match_sequence_hash
(
sequence_hash
)
{
if
let
Some
(
raw_block
)
=
self
.inactive
.match_sequence_hash
(
*
sequence_hash
)
{
raw_block
}
else
{
self
.wait_for_returned_block
(
sequence_hash
,
return_rx
)
.await
self
.wait_for_returned_block
(
*
sequence_hash
,
return_rx
)
.await
};
// this assert allows us to skip the error checking on the active pool registration step
...
...
@@ -239,6 +250,13 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
immutable_blocks
.push
(
immutable
);
}
self
.metrics
.counter
(
"cache_hits"
)
.inc_by
(
immutable_blocks
.len
()
as
u64
);
self
.metrics
.counter
(
"cache_misses"
)
.inc_by
(
sequence_hashes
.len
()
as
u64
-
immutable_blocks
.len
()
as
u64
);
immutable_blocks
}
...
...
@@ -254,6 +272,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
}
impl
<
S
:
Storage
,
M
:
BlockMetadata
>
ProgressEngine
<
S
,
M
>
{
#[allow(clippy::too_many_arguments)]
pub
fn
new
(
event_manager
:
Arc
<
dyn
EventManager
>
,
priority_rx
:
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
PriorityRequest
<
S
,
M
>>
,
...
...
@@ -262,10 +281,16 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
blocks
:
Vec
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
Self
{
let
(
return_tx
,
return_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
mut
state
=
State
::
<
S
,
M
>
::
new
(
event_manager
,
return_tx
,
global_registry
,
async_runtime
);
let
mut
state
=
State
::
<
S
,
M
>
::
new
(
event_manager
,
return_tx
,
global_registry
,
async_runtime
,
metrics
.clone
(),
);
tracing
::
debug!
(
count
=
blocks
.len
(),
"adding blocks to inactive pool"
);
state
.inactive
.add_blocks
(
blocks
);
...
...
@@ -276,6 +301,7 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
cancel_token
,
state
,
return_rx
,
metrics
,
}
}
...
...
@@ -284,14 +310,17 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
biased
;
Some
(
priority_req
)
=
self
.priority_rx
.recv
(),
if
!
self
.priority_rx
.is_closed
()
=>
{
self
.metrics
.gauge
(
"priority_request_queue_size"
)
.set
(
self
.priority_rx
.len
()
as
i64
);
self
.state
.handle_priority_request
(
priority_req
,
&
mut
self
.return_rx
)
.await
;
}
Some
(
req
)
=
self
.ctrl_rx
.recv
(),
if
!
self
.ctrl_rx
.is_closed
()
=>
{
self
.metrics
.gauge
(
"control_request_queue_size"
)
.set
(
self
.ctrl_rx
.len
()
as
i64
);
self
.state
.handle_control_request
(
req
);
}
Some
(
block
)
=
self
.return_rx
.recv
()
=>
{
self
.metrics
.gauge
(
"return_block_queue_size"
)
.set
(
self
.return_rx
.len
()
as
i64
);
self
.state
.handle_return_block
(
block
);
}
...
...
lib/llm/src/block_manager/state.rs
View file @
a1aea900
...
...
@@ -20,6 +20,7 @@ use super::{
block
::{
Block
,
GlobalRegistry
,
ImmutableBlock
},
config
::
NixlOptions
,
events
::{
EventManager
,
NullEventManager
},
metrics
::{
BlockManagerMetrics
,
PoolMetrics
},
};
use
std
::
sync
::
Arc
;
use
tokio
::
runtime
::
Handle
;
...
...
@@ -58,6 +59,9 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
let
mut
nixl_backends
:
HashMap
<
String
,
Arc
<
nixl_sys
::
Backend
>>
=
HashMap
::
new
();
let
global_registry
=
GlobalRegistry
::
default
();
let
metrics
=
BlockManagerMetrics
::
new
(
&
config
.runtime.metrics_registry
)
?
;
let
event_manager
=
config
.event_manager
.clone
()
...
...
@@ -135,6 +139,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
worker_id
,
global_registry
.clone
(),
async_rt_handle
.clone
(),
metrics
.pool
(
"disk"
),
Some
(
event_manager
.clone
()),
)
?
;
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
...
...
@@ -158,6 +163,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
worker_id
,
global_registry
.clone
(),
async_rt_handle
.clone
(),
metrics
.pool
(
"host"
),
Some
(
event_manager
.clone
()),
)
?
;
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
...
...
@@ -180,6 +186,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
worker_id
,
global_registry
.clone
(),
async_rt_handle
.clone
(),
metrics
.pool
(
"device"
),
Some
(
event_manager
.clone
()),
)
?
;
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
...
...
@@ -200,6 +207,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
device_pool
.clone
(),
nixl_agent
.clone
(),
async_rt_handle
,
metrics
.clone
(),
cancellation_token
.clone
(),
)
?
;
...
...
@@ -475,7 +483,7 @@ fn create_layout<S: Storage + NixlRegisterableStorage>(
anyhow
::
bail!
(
"failed to create layout"
);
}
#[expect(clippy::type_complexity)]
#[expect(clippy::type_complexity
,
clippy::too_many_arguments
)]
fn
create_block_pool
<
S
:
Storage
+
NixlRegisterableStorage
,
M
:
BlockMetadata
>
(
layout
:
Arc
<
dyn
NixlLayout
<
StorageType
=
S
>>
,
block_set_idx
:
usize
,
...
...
@@ -483,6 +491,7 @@ fn create_block_pool<S: Storage + NixlRegisterableStorage, M: BlockMetadata>(
worker_id
:
WorkerID
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
pool_metrics
:
Arc
<
PoolMetrics
>
,
event_manager
:
Option
<
Arc
<
dyn
EventManager
>>
,
)
->
Result
<
(
BlockPool
<
S
,
M
>
,
Vec
<
Block
<
S
,
M
>>
)
>
{
let
blocks
=
block
::
layout_to_blocks
::
<
_
,
M
>
(
layout
,
block_set_idx
,
worker_id
)
?
;
...
...
@@ -491,6 +500,7 @@ fn create_block_pool<S: Storage + NixlRegisterableStorage, M: BlockMetadata>(
.cancel_token
(
cancellation_token
)
.global_registry
(
global_registry
)
.async_runtime
(
async_runtime
)
.pool_metrics
(
pool_metrics
)
.event_manager
(
event_manager
)
.build
()
?
;
Ok
((
pool
,
blocks
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment