Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a1aea900
"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "d58a68819cb73668e8f6cd74b59dfd3fa9ff63af"
Unverified
Commit
a1aea900
authored
Jun 09, 2025
by
jthomson04
Committed by
GitHub
Jun 09, 2025
Browse files
feat: KVBM prometheus monitoring (#1211)
parent
312ee8e2
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
178 additions
and
15 deletions
+178
-15
lib/llm/src/block_manager.rs
lib/llm/src/block_manager.rs
+1
-0
lib/llm/src/block_manager/config.rs
lib/llm/src/block_manager/config.rs
+4
-0
lib/llm/src/block_manager/metrics.rs
lib/llm/src/block_manager/metrics.rs
+82
-0
lib/llm/src/block_manager/offload.rs
lib/llm/src/block_manager/offload.rs
+25
-4
lib/llm/src/block_manager/pool.rs
lib/llm/src/block_manager/pool.rs
+18
-2
lib/llm/src/block_manager/pool/state.rs
lib/llm/src/block_manager/pool/state.rs
+37
-8
lib/llm/src/block_manager/state.rs
lib/llm/src/block_manager/state.rs
+11
-1
No files found.
lib/llm/src/block_manager.rs
View file @
a1aea900
...
@@ -25,6 +25,7 @@ mod state;
...
@@ -25,6 +25,7 @@ mod state;
pub
mod
block
;
pub
mod
block
;
pub
mod
events
;
pub
mod
events
;
pub
mod
layout
;
pub
mod
layout
;
pub
mod
metrics
;
pub
mod
offload
;
pub
mod
offload
;
pub
mod
pool
;
pub
mod
pool
;
pub
mod
storage
;
pub
mod
storage
;
...
...
lib/llm/src/block_manager/config.rs
View file @
a1aea900
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
use
super
::
events
::
EventManager
;
use
super
::
events
::
EventManager
;
use
super
::
*
;
use
super
::
*
;
use
prometheus
::
Registry
;
#[derive(Debug,
Clone)]
#[derive(Debug,
Clone)]
pub
enum
NixlOptions
{
pub
enum
NixlOptions
{
...
@@ -41,6 +42,9 @@ pub struct KvManagerRuntimeConfig {
...
@@ -41,6 +42,9 @@ pub struct KvManagerRuntimeConfig {
#[builder(default)]
#[builder(default)]
pub
async_runtime
:
Option
<
Arc
<
tokio
::
runtime
::
Runtime
>>
,
pub
async_runtime
:
Option
<
Arc
<
tokio
::
runtime
::
Runtime
>>
,
#[builder(default
=
"Arc::new(Registry::new())"
)]
pub
metrics_registry
:
Arc
<
Registry
>
,
}
}
impl
KvManagerRuntimeConfig
{
impl
KvManagerRuntimeConfig
{
...
...
lib/llm/src/block_manager/metrics.rs
0 → 100644
View file @
a1aea900
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
anyhow
::
Result
;
use
prometheus
::{
core
::{
AtomicI64
,
AtomicU64
,
GenericCounter
,
GenericGauge
},
register_int_counter_vec_with_registry
,
register_int_gauge_vec_with_registry
,
IntCounterVec
,
IntGaugeVec
,
Opts
,
Registry
,
};
use
std
::
sync
::
Arc
;
pub
struct
BlockManagerMetrics
{
gauges
:
IntGaugeVec
,
counters
:
IntCounterVec
,
}
impl
BlockManagerMetrics
{
pub
fn
new
(
metrics_registry
:
&
Arc
<
Registry
>
)
->
Result
<
Arc
<
Self
>>
{
let
gauge_opts
=
Opts
::
new
(
"gauges"
,
"Gauges for the pools"
)
.namespace
(
"dynamo"
)
.subsystem
(
"kvbm"
);
let
counter_opts
=
Opts
::
new
(
"pools"
,
"Counters for the pools"
)
.namespace
(
"dynamo"
)
.subsystem
(
"kvbm"
);
let
gauges
=
register_int_gauge_vec_with_registry!
(
gauge_opts
,
&
[
"pool"
,
"metric_type"
],
metrics_registry
)
?
;
let
counters
=
register_int_counter_vec_with_registry!
(
counter_opts
,
&
[
"pool"
,
"metric_type"
],
metrics_registry
)
?
;
Ok
(
Arc
::
new
(
Self
{
gauges
,
counters
}))
}
pub
fn
pool
(
self
:
&
Arc
<
Self
>
,
group
:
&
str
)
->
Arc
<
PoolMetrics
>
{
PoolMetrics
::
new
(
self
,
group
)
}
}
pub
struct
PoolMetrics
{
block_manager_metrics
:
Arc
<
BlockManagerMetrics
>
,
group
:
String
,
}
impl
PoolMetrics
{
pub
fn
new
(
block_manager_metrics
:
&
Arc
<
BlockManagerMetrics
>
,
group
:
&
str
)
->
Arc
<
Self
>
{
Arc
::
new
(
Self
{
block_manager_metrics
:
block_manager_metrics
.clone
(),
group
:
group
.to_string
(),
})
}
pub
fn
gauge
(
&
self
,
metric_type
:
&
str
)
->
GenericGauge
<
AtomicI64
>
{
self
.block_manager_metrics
.gauges
.with_label_values
(
&
[
&
self
.group
,
&
metric_type
.to_string
()])
}
pub
fn
counter
(
&
self
,
metric_type
:
&
str
)
->
GenericCounter
<
AtomicU64
>
{
self
.block_manager_metrics
.counters
.with_label_values
(
&
[
&
self
.group
,
&
metric_type
.to_string
()])
}
}
lib/llm/src/block_manager/offload.rs
View file @
a1aea900
...
@@ -42,9 +42,10 @@
...
@@ -42,9 +42,10 @@
//! The [`OffloadManager::onboard_worker`] is responsible for onboarding blocks.
//! The [`OffloadManager::onboard_worker`] is responsible for onboarding blocks.
//!
//!
//! The kind of offloads/onboards they perform is dictated by the source and target arguments
//! The kind of offloads/onboards they perform is dictated by the source and target arguments
//! of the [`OffloadManager::offload`] and [`OffloadManager::onboard`] methods.
//! of the [`OffloadManager::offload
_worker
`] and [`OffloadManager::onboard
_worker
`] methods.
use
super
::
block
::{
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
TransferContext
};
use
super
::
block
::{
BlockError
,
BlockMetadata
,
BlockState
,
ImmutableBlock
,
TransferContext
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
pool
::
BlockPoolError
;
use
super
::
pool
::
BlockPoolError
;
use
super
::
storage
::{
Cuda
,
Storage
};
use
super
::
storage
::{
Cuda
,
Storage
};
use
super
::{
BlockPool
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
};
use
super
::{
BlockPool
,
DeviceStorage
,
DiskStorage
,
PinnedStorage
};
...
@@ -101,6 +102,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -101,6 +102,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
device
:
Option
<
Arc
<
BlockPool
<
DeviceStorage
,
Metadata
>>>
,
device
:
Option
<
Arc
<
BlockPool
<
DeviceStorage
,
Metadata
>>>
,
nixl_agent
:
Arc
<
Option
<
NixlAgent
>>
,
nixl_agent
:
Arc
<
Option
<
NixlAgent
>>
,
async_rt_handle
:
Handle
,
async_rt_handle
:
Handle
,
metrics
:
Arc
<
BlockManagerMetrics
>
,
cancellation_token
:
CancellationToken
,
cancellation_token
:
CancellationToken
,
)
->
Result
<
Arc
<
Self
>>
{
)
->
Result
<
Arc
<
Self
>>
{
let
(
device_offload_tx
,
device_offload_rx
)
=
mpsc
::
unbounded_channel
();
let
(
device_offload_tx
,
device_offload_rx
)
=
mpsc
::
unbounded_channel
();
...
@@ -120,8 +122,6 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -120,8 +122,6 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
tick
:
Arc
::
new
(
Mutex
::
new
(
0
)),
tick
:
Arc
::
new
(
Mutex
::
new
(
0
)),
});
});
let
this_clone
=
this
.clone
();
let
cuda_ctx
=
Cuda
::
device_or_create
(
0
)
?
;
let
cuda_ctx
=
Cuda
::
device_or_create
(
0
)
?
;
// We want cuda offloads to happen in parallel with host onboards, so we need to use a different stream.
// We want cuda offloads to happen in parallel with host onboards, so we need to use a different stream.
...
@@ -147,6 +147,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -147,6 +147,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
&
async_rt_handle
,
cancellation_token
.clone
(),
cancellation_token
.clone
(),
)),
)),
metrics
.pool
(
"device"
),
cancellation_token
.clone
(),
cancellation_token
.clone
(),
);
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
@@ -179,6 +180,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -179,6 +180,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
&
async_rt_handle
,
cancellation_token
.clone
(),
cancellation_token
.clone
(),
)),
)),
metrics
.pool
(
"host"
),
cancellation_token
.clone
(),
cancellation_token
.clone
(),
);
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
@@ -205,6 +207,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -205,6 +207,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
&
async_rt_handle
,
cancellation_token
.clone
(),
cancellation_token
.clone
(),
)),
)),
metrics
.pool
(
"host"
),
cancellation_token
.clone
(),
cancellation_token
.clone
(),
);
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
@@ -231,6 +234,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -231,6 +234,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
&
async_rt_handle
,
&
async_rt_handle
,
cancellation_token
.clone
(),
cancellation_token
.clone
(),
)),
)),
metrics
.pool
(
"disk"
),
cancellation_token
.clone
(),
cancellation_token
.clone
(),
);
);
CriticalTaskExecutionHandle
::
new_with_runtime
(
CriticalTaskExecutionHandle
::
new_with_runtime
(
...
@@ -241,7 +245,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -241,7 +245,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
)
?
)
?
.detach
();
.detach
();
Ok
(
this
_clone
)
Ok
(
this
)
}
}
async
fn
offload_worker
<
Source
:
Storage
,
Target
:
Storage
>
(
async
fn
offload_worker
<
Source
:
Storage
,
Target
:
Storage
>
(
...
@@ -249,6 +253,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -249,6 +253,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
target_pool
:
Option
<
Arc
<
BlockPool
<
Target
,
Metadata
>>>
,
target_pool
:
Option
<
Arc
<
BlockPool
<
Target
,
Metadata
>>>
,
mut
offload_rx
:
mpsc
::
UnboundedReceiver
<
OffloadRequest
<
Source
,
Metadata
>>
,
mut
offload_rx
:
mpsc
::
UnboundedReceiver
<
OffloadRequest
<
Source
,
Metadata
>>
,
transfer_manager
:
Arc
<
dyn
TransferManager
<
Source
,
Target
,
Metadata
>>
,
transfer_manager
:
Arc
<
dyn
TransferManager
<
Source
,
Target
,
Metadata
>>
,
pool_metrics
:
Arc
<
PoolMetrics
>
,
cancellation_token
:
CancellationToken
,
cancellation_token
:
CancellationToken
,
)
->
Result
<
()
>
{
)
->
Result
<
()
>
{
if
source_pool
.is_none
()
||
target_pool
.is_none
()
{
if
source_pool
.is_none
()
||
target_pool
.is_none
()
{
...
@@ -270,6 +275,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -270,6 +275,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
match
offload_rx
.try_recv
()
{
match
offload_rx
.try_recv
()
{
Ok
(
request
)
=>
{
Ok
(
request
)
=>
{
queue
.insert
(
request
);
queue
.insert
(
request
);
pool_metrics
.gauge
(
"offload_queue_size"
)
.inc
();
}
}
Err
(
TryRecvError
::
Empty
)
=>
{
Err
(
TryRecvError
::
Empty
)
=>
{
break
;
break
;
...
@@ -280,6 +286,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -280,6 +286,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
// If there is a request, process it.
// If there is a request, process it.
if
let
Some
(
request
)
=
queue
.pop_first
()
{
if
let
Some
(
request
)
=
queue
.pop_first
()
{
pool_metrics
.gauge
(
"offload_queue_size"
)
.dec
();
// Try to upgrade the block to a strong reference.
// Try to upgrade the block to a strong reference.
let
block
=
match
request
.block
.upgrade
()
{
let
block
=
match
request
.block
.upgrade
()
{
Some
(
block
)
=>
Some
(
block
),
Some
(
block
)
=>
Some
(
block
),
...
@@ -311,6 +318,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -311,6 +318,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
};
};
if
let
Some
(
target_block
)
=
target_blocks
.into_iter
()
.next
()
{
if
let
Some
(
target_block
)
=
target_blocks
.into_iter
()
.next
()
{
pool_metrics
.counter
(
"offload_processed"
)
.inc
();
transfer_manager
transfer_manager
.enqueue_transfer
(
PendingTransfer
::
new
(
.enqueue_transfer
(
PendingTransfer
::
new
(
vec!
[
block
],
vec!
[
block
],
...
@@ -327,6 +335,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -327,6 +335,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
_
=
cancellation_token
.cancelled
()
=>
return
Ok
(()),
_
=
cancellation_token
.cancelled
()
=>
return
Ok
(()),
Some
(
request
)
=
offload_rx
.recv
()
=>
{
Some
(
request
)
=
offload_rx
.recv
()
=>
{
queue
.insert
(
request
);
queue
.insert
(
request
);
pool_metrics
.gauge
(
"offload_queue_size"
)
.inc
();
}
}
}
}
}
}
...
@@ -338,6 +347,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -338,6 +347,7 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
target_pool
:
Option
<
Arc
<
BlockPool
<
Target
,
Metadata
>>>
,
target_pool
:
Option
<
Arc
<
BlockPool
<
Target
,
Metadata
>>>
,
mut
onboard_rx
:
mpsc
::
UnboundedReceiver
<
OnboardRequest
<
Source
,
Target
,
Metadata
>>
,
mut
onboard_rx
:
mpsc
::
UnboundedReceiver
<
OnboardRequest
<
Source
,
Target
,
Metadata
>>
,
transfer_manager
:
Arc
<
dyn
TransferManager
<
Source
,
Target
,
Metadata
>>
,
transfer_manager
:
Arc
<
dyn
TransferManager
<
Source
,
Target
,
Metadata
>>
,
pool_metrics
:
Arc
<
PoolMetrics
>
,
cancellation_token
:
CancellationToken
,
cancellation_token
:
CancellationToken
,
)
->
Result
<
()
>
{
)
->
Result
<
()
>
{
if
source_pool
.is_none
()
||
target_pool
.is_none
()
{
if
source_pool
.is_none
()
||
target_pool
.is_none
()
{
...
@@ -349,6 +359,11 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -349,6 +359,11 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
tokio
::
select!
{
tokio
::
select!
{
_
=
cancellation_token
.cancelled
()
=>
return
Ok
::
<
(),
anyhow
::
Error
>
(()),
_
=
cancellation_token
.cancelled
()
=>
return
Ok
::
<
(),
anyhow
::
Error
>
(()),
Some
(
request
)
=
onboard_rx
.recv
()
=>
{
Some
(
request
)
=
onboard_rx
.recv
()
=>
{
pool_metrics
.gauge
(
"onboard_queue_size"
)
.set
(
onboard_rx
.len
()
as
i64
);
// Try to allocate blocks on the device.
// Try to allocate blocks on the device.
let
target_blocks
=
match
target_pool
.allocate_blocks
(
request
.blocks
.len
())
.await
{
let
target_blocks
=
match
target_pool
.allocate_blocks
(
request
.blocks
.len
())
.await
{
Ok
(
blocks
)
=>
blocks
,
Ok
(
blocks
)
=>
blocks
,
...
@@ -358,6 +373,10 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
...
@@ -358,6 +373,10 @@ impl<Metadata: BlockMetadata> OffloadManager<Metadata> {
}
}
};
};
pool_metrics
.counter
(
"onboard_processed"
)
.inc_by
(
request
.blocks
.len
()
as
u64
);
let
sources
=
request
let
sources
=
request
.blocks
.blocks
.iter
()
.iter
()
...
@@ -535,6 +554,7 @@ mod tests {
...
@@ -535,6 +554,7 @@ mod tests {
use
aligned_vec
::
avec
;
use
aligned_vec
::
avec
;
use
cudarc
::
runtime
::
sys
::{
cudaMemcpy
,
cudaMemcpyKind
,
cudaMemset
};
use
cudarc
::
runtime
::
sys
::{
cudaMemcpy
,
cudaMemcpyKind
,
cudaMemset
};
use
prometheus
::
Registry
;
use
std
::
fs
::
File
;
use
std
::
fs
::
File
;
use
std
::
io
::{
Read
,
Seek
,
SeekFrom
,
Write
};
use
std
::
io
::{
Read
,
Seek
,
SeekFrom
,
Write
};
use
std
::
mem
::
ManuallyDrop
;
use
std
::
mem
::
ManuallyDrop
;
...
@@ -621,6 +641,7 @@ mod tests {
...
@@ -621,6 +641,7 @@ mod tests {
device_pool
.clone
(),
device_pool
.clone
(),
agent_arc
,
agent_arc
,
async_rt_handle
,
async_rt_handle
,
BlockManagerMetrics
::
new
(
&
Arc
::
new
(
Registry
::
new
()))
?
,
CancellationToken
::
new
(),
CancellationToken
::
new
(),
)
?
;
)
?
;
...
...
lib/llm/src/block_manager/pool.rs
View file @
a1aea900
...
@@ -73,10 +73,12 @@ use super::block::{
...
@@ -73,10 +73,12 @@ use super::block::{
GlobalRegistry
,
GlobalRegistry
,
};
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
events
::{
EventManager
,
NullEventManager
};
use
super
::
metrics
::{
BlockManagerMetrics
,
PoolMetrics
};
use
super
::
storage
::
Storage
;
use
super
::
storage
::
Storage
;
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
crate
::
tokens
::{
SequenceHash
,
TokenBlock
};
use
prometheus
::
Registry
;
use
std
::{
use
std
::{
collections
::{
BTreeSet
,
HashMap
,
VecDeque
},
collections
::{
BTreeSet
,
HashMap
,
VecDeque
},
sync
::{
Arc
,
Weak
},
sync
::{
Arc
,
Weak
},
...
@@ -124,12 +126,18 @@ pub struct BlockPoolArgs<S: Storage, M: BlockMetadata> {
...
@@ -124,12 +126,18 @@ pub struct BlockPoolArgs<S: Storage, M: BlockMetadata> {
#[builder(default
=
"Handle::current()"
)]
#[builder(default
=
"Handle::current()"
)]
async_runtime
:
Handle
,
async_runtime
:
Handle
,
#[builder(
default
=
"BlockManagerMetrics::new(&Arc::new(Registry::new())).unwrap().pool(
\"
pool
\"
)"
)]
pool_metrics
:
Arc
<
PoolMetrics
>
,
}
}
impl
<
S
:
Storage
,
M
:
BlockMetadata
>
BlockPoolArgsBuilder
<
S
,
M
>
{
impl
<
S
:
Storage
,
M
:
BlockMetadata
>
BlockPoolArgsBuilder
<
S
,
M
>
{
pub
fn
build
(
self
)
->
anyhow
::
Result
<
BlockPool
<
S
,
M
>>
{
pub
fn
build
(
self
)
->
anyhow
::
Result
<
BlockPool
<
S
,
M
>>
{
let
args
=
self
.build_internal
()
?
;
let
args
=
self
.build_internal
()
?
;
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
)
=
args
.dissolve
();
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
,
metrics
)
=
args
.dissolve
();
tracing
::
info!
(
"building block pool"
);
tracing
::
info!
(
"building block pool"
);
let
pool
=
BlockPool
::
new
(
let
pool
=
BlockPool
::
new
(
...
@@ -138,6 +146,7 @@ impl<S: Storage, M: BlockMetadata> BlockPoolArgsBuilder<S, M> {
...
@@ -138,6 +146,7 @@ impl<S: Storage, M: BlockMetadata> BlockPoolArgsBuilder<S, M> {
blocks
,
blocks
,
global_registry
,
global_registry
,
async_runtime
,
async_runtime
,
metrics
,
);
);
Ok
(
pool
)
Ok
(
pool
)
...
@@ -216,6 +225,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
...
@@ -216,6 +225,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
:
Vec
<
Block
<
S
,
M
>>
,
blocks
:
Vec
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
Self
{
)
->
Self
{
let
(
pool
,
progress_engine
)
=
Self
::
with_progress_engine
(
let
(
pool
,
progress_engine
)
=
Self
::
with_progress_engine
(
event_manager
,
event_manager
,
...
@@ -223,6 +233,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
...
@@ -223,6 +233,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
,
blocks
,
global_registry
,
global_registry
,
async_runtime
,
async_runtime
,
metrics
,
);
);
// pool.runtime.handle().spawn(async move {
// pool.runtime.handle().spawn(async move {
...
@@ -262,6 +273,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
...
@@ -262,6 +273,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
:
Vec
<
Block
<
S
,
M
>>
,
blocks
:
Vec
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
(
Self
,
ProgressEngine
<
S
,
M
>
)
{
)
->
(
Self
,
ProgressEngine
<
S
,
M
>
)
{
let
(
priority_tx
,
priority_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
(
priority_tx
,
priority_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
(
ctrl_tx
,
ctrl_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
(
ctrl_tx
,
ctrl_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
...
@@ -274,6 +286,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
...
@@ -274,6 +286,7 @@ impl<S: Storage, M: BlockMetadata> BlockPool<S, M> {
blocks
,
blocks
,
global_registry
,
global_registry
,
async_runtime
,
async_runtime
,
metrics
,
);
);
(
(
...
@@ -474,6 +487,7 @@ struct State<S: Storage, M: BlockMetadata> {
...
@@ -474,6 +487,7 @@ struct State<S: Storage, M: BlockMetadata> {
registry
:
BlockRegistry
,
registry
:
BlockRegistry
,
return_tx
:
tokio
::
sync
::
mpsc
::
UnboundedSender
<
Block
<
S
,
M
>>
,
return_tx
:
tokio
::
sync
::
mpsc
::
UnboundedSender
<
Block
<
S
,
M
>>
,
event_manager
:
Arc
<
dyn
EventManager
>
,
event_manager
:
Arc
<
dyn
EventManager
>
,
metrics
:
Arc
<
PoolMetrics
>
,
}
}
struct
ProgressEngine
<
S
:
Storage
,
M
:
BlockMetadata
>
{
struct
ProgressEngine
<
S
:
Storage
,
M
:
BlockMetadata
>
{
...
@@ -482,6 +496,7 @@ struct ProgressEngine<S: Storage, M: BlockMetadata> {
...
@@ -482,6 +496,7 @@ struct ProgressEngine<S: Storage, M: BlockMetadata> {
cancel_token
:
CancellationToken
,
cancel_token
:
CancellationToken
,
state
:
State
<
S
,
M
>
,
state
:
State
<
S
,
M
>
,
return_rx
:
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
Block
<
S
,
M
>>
,
return_rx
:
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
Block
<
S
,
M
>>
,
metrics
:
Arc
<
PoolMetrics
>
,
}
}
#[cfg(test)]
#[cfg(test)]
...
@@ -498,7 +513,7 @@ mod tests {
...
@@ -498,7 +513,7 @@ mod tests {
self
,
self
,
)
->
anyhow
::
Result
<
(
BlockPool
<
S
,
M
>
,
ProgressEngine
<
S
,
M
>
)
>
{
)
->
anyhow
::
Result
<
(
BlockPool
<
S
,
M
>
,
ProgressEngine
<
S
,
M
>
)
>
{
let
args
=
self
.build_internal
()
?
;
let
args
=
self
.build_internal
()
?
;
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
)
=
let
(
event_manager
,
cancel_token
,
blocks
,
global_registry
,
async_runtime
,
metrics
)
=
args
.dissolve
();
args
.dissolve
();
let
(
pool
,
progress_engine
)
=
BlockPool
::
with_progress_engine
(
let
(
pool
,
progress_engine
)
=
BlockPool
::
with_progress_engine
(
event_manager
,
event_manager
,
...
@@ -506,6 +521,7 @@ mod tests {
...
@@ -506,6 +521,7 @@ mod tests {
blocks
,
blocks
,
global_registry
,
global_registry
,
async_runtime
,
async_runtime
,
metrics
,
);
);
Ok
((
pool
,
progress_engine
))
Ok
((
pool
,
progress_engine
))
...
...
lib/llm/src/block_manager/pool/state.rs
View file @
a1aea900
...
@@ -26,6 +26,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -26,6 +26,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
return_tx
:
tokio
::
sync
::
mpsc
::
UnboundedSender
<
Block
<
S
,
M
>>
,
return_tx
:
tokio
::
sync
::
mpsc
::
UnboundedSender
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
Self
{
)
->
Self
{
Self
{
Self
{
active
:
ActiveBlockPool
::
new
(),
active
:
ActiveBlockPool
::
new
(),
...
@@ -33,6 +34,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -33,6 +34,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
registry
:
BlockRegistry
::
new
(
event_manager
.clone
(),
global_registry
,
async_runtime
),
registry
:
BlockRegistry
::
new
(
event_manager
.clone
(),
global_registry
,
async_runtime
),
return_tx
,
return_tx
,
event_manager
,
event_manager
,
metrics
,
}
}
}
}
...
@@ -126,6 +128,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -126,6 +128,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
}
}
}
}
self
.metrics
.counter
(
"blocks_allocated"
)
.inc_by
(
count
as
u64
);
Ok
(
blocks
)
Ok
(
blocks
)
}
}
...
@@ -195,6 +201,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -195,6 +201,10 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
assert_eq!
(
immutable_blocks
.len
(),
expected_len
);
assert_eq!
(
immutable_blocks
.len
(),
expected_len
);
self
.metrics
.counter
(
"blocks_registered"
)
.inc_by
(
immutable_blocks
.len
()
as
u64
);
Ok
(
immutable_blocks
)
Ok
(
immutable_blocks
)
}
}
...
@@ -204,9 +214,9 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -204,9 +214,9 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
return_rx
:
&
mut
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
Block
<
S
,
M
>>
,
return_rx
:
&
mut
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
Block
<
S
,
M
>>
,
)
->
Vec
<
ImmutableBlock
<
S
,
M
>>
{
)
->
Vec
<
ImmutableBlock
<
S
,
M
>>
{
let
mut
immutable_blocks
=
Vec
::
new
();
let
mut
immutable_blocks
=
Vec
::
new
();
for
sequence_hash
in
sequence_hashes
{
for
sequence_hash
in
&
sequence_hashes
{
if
!
self
.registry
.is_registered
(
sequence_hash
)
{
if
!
self
.registry
.is_registered
(
*
sequence_hash
)
{
re
turn
immutable_blocks
;
b
re
ak
;
}
}
// the block is registered, so to get it from either the:
// the block is registered, so to get it from either the:
...
@@ -214,16 +224,17 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -214,16 +224,17 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
// 2. inactive pool
// 2. inactive pool
// 3. return channel
// 3. return channel
if
let
Some
(
immutable
)
=
self
.active
.match_sequence_hash
(
sequence_hash
)
{
if
let
Some
(
immutable
)
=
self
.active
.match_sequence_hash
(
*
sequence_hash
)
{
immutable_blocks
.push
(
immutable
);
immutable_blocks
.push
(
immutable
);
continue
;
continue
;
}
}
let
raw_block
=
let
raw_block
=
if
let
Some
(
raw_block
)
=
self
.inactive
.match_sequence_hash
(
sequence_hash
)
{
if
let
Some
(
raw_block
)
=
self
.inactive
.match_sequence_hash
(
*
sequence_hash
)
{
raw_block
raw_block
}
else
{
}
else
{
self
.wait_for_returned_block
(
sequence_hash
,
return_rx
)
.await
self
.wait_for_returned_block
(
*
sequence_hash
,
return_rx
)
.await
};
};
// this assert allows us to skip the error checking on the active pool registration step
// this assert allows us to skip the error checking on the active pool registration step
...
@@ -239,6 +250,13 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -239,6 +250,13 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
immutable_blocks
.push
(
immutable
);
immutable_blocks
.push
(
immutable
);
}
}
self
.metrics
.counter
(
"cache_hits"
)
.inc_by
(
immutable_blocks
.len
()
as
u64
);
self
.metrics
.counter
(
"cache_misses"
)
.inc_by
(
sequence_hashes
.len
()
as
u64
-
immutable_blocks
.len
()
as
u64
);
immutable_blocks
immutable_blocks
}
}
...
@@ -254,6 +272,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
...
@@ -254,6 +272,7 @@ impl<S: Storage, M: BlockMetadata> State<S, M> {
}
}
impl
<
S
:
Storage
,
M
:
BlockMetadata
>
ProgressEngine
<
S
,
M
>
{
impl
<
S
:
Storage
,
M
:
BlockMetadata
>
ProgressEngine
<
S
,
M
>
{
#[allow(clippy::too_many_arguments)]
pub
fn
new
(
pub
fn
new
(
event_manager
:
Arc
<
dyn
EventManager
>
,
event_manager
:
Arc
<
dyn
EventManager
>
,
priority_rx
:
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
PriorityRequest
<
S
,
M
>>
,
priority_rx
:
tokio
::
sync
::
mpsc
::
UnboundedReceiver
<
PriorityRequest
<
S
,
M
>>
,
...
@@ -262,10 +281,16 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
...
@@ -262,10 +281,16 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
blocks
:
Vec
<
Block
<
S
,
M
>>
,
blocks
:
Vec
<
Block
<
S
,
M
>>
,
global_registry
:
GlobalRegistry
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
async_runtime
:
Handle
,
metrics
:
Arc
<
PoolMetrics
>
,
)
->
Self
{
)
->
Self
{
let
(
return_tx
,
return_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
(
return_tx
,
return_rx
)
=
tokio
::
sync
::
mpsc
::
unbounded_channel
();
let
mut
state
=
let
mut
state
=
State
::
<
S
,
M
>
::
new
(
State
::
<
S
,
M
>
::
new
(
event_manager
,
return_tx
,
global_registry
,
async_runtime
);
event_manager
,
return_tx
,
global_registry
,
async_runtime
,
metrics
.clone
(),
);
tracing
::
debug!
(
count
=
blocks
.len
(),
"adding blocks to inactive pool"
);
tracing
::
debug!
(
count
=
blocks
.len
(),
"adding blocks to inactive pool"
);
state
.inactive
.add_blocks
(
blocks
);
state
.inactive
.add_blocks
(
blocks
);
...
@@ -276,6 +301,7 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
...
@@ -276,6 +301,7 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
cancel_token
,
cancel_token
,
state
,
state
,
return_rx
,
return_rx
,
metrics
,
}
}
}
}
...
@@ -284,14 +310,17 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
...
@@ -284,14 +310,17 @@ impl<S: Storage, M: BlockMetadata> ProgressEngine<S, M> {
biased
;
biased
;
Some
(
priority_req
)
=
self
.priority_rx
.recv
(),
if
!
self
.priority_rx
.is_closed
()
=>
{
Some
(
priority_req
)
=
self
.priority_rx
.recv
(),
if
!
self
.priority_rx
.is_closed
()
=>
{
self
.metrics
.gauge
(
"priority_request_queue_size"
)
.set
(
self
.priority_rx
.len
()
as
i64
);
self
.state
.handle_priority_request
(
priority_req
,
&
mut
self
.return_rx
)
.await
;
self
.state
.handle_priority_request
(
priority_req
,
&
mut
self
.return_rx
)
.await
;
}
}
Some
(
req
)
=
self
.ctrl_rx
.recv
(),
if
!
self
.ctrl_rx
.is_closed
()
=>
{
Some
(
req
)
=
self
.ctrl_rx
.recv
(),
if
!
self
.ctrl_rx
.is_closed
()
=>
{
self
.metrics
.gauge
(
"control_request_queue_size"
)
.set
(
self
.ctrl_rx
.len
()
as
i64
);
self
.state
.handle_control_request
(
req
);
self
.state
.handle_control_request
(
req
);
}
}
Some
(
block
)
=
self
.return_rx
.recv
()
=>
{
Some
(
block
)
=
self
.return_rx
.recv
()
=>
{
self
.metrics
.gauge
(
"return_block_queue_size"
)
.set
(
self
.return_rx
.len
()
as
i64
);
self
.state
.handle_return_block
(
block
);
self
.state
.handle_return_block
(
block
);
}
}
...
...
lib/llm/src/block_manager/state.rs
View file @
a1aea900
...
@@ -20,6 +20,7 @@ use super::{
...
@@ -20,6 +20,7 @@ use super::{
block
::{
Block
,
GlobalRegistry
,
ImmutableBlock
},
block
::{
Block
,
GlobalRegistry
,
ImmutableBlock
},
config
::
NixlOptions
,
config
::
NixlOptions
,
events
::{
EventManager
,
NullEventManager
},
events
::{
EventManager
,
NullEventManager
},
metrics
::{
BlockManagerMetrics
,
PoolMetrics
},
};
};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
tokio
::
runtime
::
Handle
;
use
tokio
::
runtime
::
Handle
;
...
@@ -58,6 +59,9 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
...
@@ -58,6 +59,9 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
let
mut
nixl_backends
:
HashMap
<
String
,
Arc
<
nixl_sys
::
Backend
>>
=
HashMap
::
new
();
let
mut
nixl_backends
:
HashMap
<
String
,
Arc
<
nixl_sys
::
Backend
>>
=
HashMap
::
new
();
let
global_registry
=
GlobalRegistry
::
default
();
let
global_registry
=
GlobalRegistry
::
default
();
let
metrics
=
BlockManagerMetrics
::
new
(
&
config
.runtime.metrics_registry
)
?
;
let
event_manager
=
config
let
event_manager
=
config
.event_manager
.event_manager
.clone
()
.clone
()
...
@@ -135,6 +139,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
...
@@ -135,6 +139,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
worker_id
,
worker_id
,
global_registry
.clone
(),
global_registry
.clone
(),
async_rt_handle
.clone
(),
async_rt_handle
.clone
(),
metrics
.pool
(
"disk"
),
Some
(
event_manager
.clone
()),
Some
(
event_manager
.clone
()),
)
?
;
)
?
;
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
...
@@ -158,6 +163,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
...
@@ -158,6 +163,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
worker_id
,
worker_id
,
global_registry
.clone
(),
global_registry
.clone
(),
async_rt_handle
.clone
(),
async_rt_handle
.clone
(),
metrics
.pool
(
"host"
),
Some
(
event_manager
.clone
()),
Some
(
event_manager
.clone
()),
)
?
;
)
?
;
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
...
@@ -180,6 +186,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
...
@@ -180,6 +186,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
worker_id
,
worker_id
,
global_registry
.clone
(),
global_registry
.clone
(),
async_rt_handle
.clone
(),
async_rt_handle
.clone
(),
metrics
.pool
(
"device"
),
Some
(
event_manager
.clone
()),
Some
(
event_manager
.clone
()),
)
?
;
)
?
;
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
(
Some
(
Arc
::
new
(
pool
)),
Some
(
blocks
))
...
@@ -200,6 +207,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
...
@@ -200,6 +207,7 @@ impl<Metadata: BlockMetadata> KvBlockManagerState<Metadata> {
device_pool
.clone
(),
device_pool
.clone
(),
nixl_agent
.clone
(),
nixl_agent
.clone
(),
async_rt_handle
,
async_rt_handle
,
metrics
.clone
(),
cancellation_token
.clone
(),
cancellation_token
.clone
(),
)
?
;
)
?
;
...
@@ -475,7 +483,7 @@ fn create_layout<S: Storage + NixlRegisterableStorage>(
...
@@ -475,7 +483,7 @@ fn create_layout<S: Storage + NixlRegisterableStorage>(
anyhow
::
bail!
(
"failed to create layout"
);
anyhow
::
bail!
(
"failed to create layout"
);
}
}
#[expect(clippy::type_complexity)]
#[expect(clippy::type_complexity
,
clippy::too_many_arguments
)]
fn
create_block_pool
<
S
:
Storage
+
NixlRegisterableStorage
,
M
:
BlockMetadata
>
(
fn
create_block_pool
<
S
:
Storage
+
NixlRegisterableStorage
,
M
:
BlockMetadata
>
(
layout
:
Arc
<
dyn
NixlLayout
<
StorageType
=
S
>>
,
layout
:
Arc
<
dyn
NixlLayout
<
StorageType
=
S
>>
,
block_set_idx
:
usize
,
block_set_idx
:
usize
,
...
@@ -483,6 +491,7 @@ fn create_block_pool<S: Storage + NixlRegisterableStorage, M: BlockMetadata>(
...
@@ -483,6 +491,7 @@ fn create_block_pool<S: Storage + NixlRegisterableStorage, M: BlockMetadata>(
worker_id
:
WorkerID
,
worker_id
:
WorkerID
,
global_registry
:
GlobalRegistry
,
global_registry
:
GlobalRegistry
,
async_runtime
:
Handle
,
async_runtime
:
Handle
,
pool_metrics
:
Arc
<
PoolMetrics
>
,
event_manager
:
Option
<
Arc
<
dyn
EventManager
>>
,
event_manager
:
Option
<
Arc
<
dyn
EventManager
>>
,
)
->
Result
<
(
BlockPool
<
S
,
M
>
,
Vec
<
Block
<
S
,
M
>>
)
>
{
)
->
Result
<
(
BlockPool
<
S
,
M
>
,
Vec
<
Block
<
S
,
M
>>
)
>
{
let
blocks
=
block
::
layout_to_blocks
::
<
_
,
M
>
(
layout
,
block_set_idx
,
worker_id
)
?
;
let
blocks
=
block
::
layout_to_blocks
::
<
_
,
M
>
(
layout
,
block_set_idx
,
worker_id
)
?
;
...
@@ -491,6 +500,7 @@ fn create_block_pool<S: Storage + NixlRegisterableStorage, M: BlockMetadata>(
...
@@ -491,6 +500,7 @@ fn create_block_pool<S: Storage + NixlRegisterableStorage, M: BlockMetadata>(
.cancel_token
(
cancellation_token
)
.cancel_token
(
cancellation_token
)
.global_registry
(
global_registry
)
.global_registry
(
global_registry
)
.async_runtime
(
async_runtime
)
.async_runtime
(
async_runtime
)
.pool_metrics
(
pool_metrics
)
.event_manager
(
event_manager
)
.event_manager
(
event_manager
)
.build
()
?
;
.build
()
?
;
Ok
((
pool
,
blocks
))
Ok
((
pool
,
blocks
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment