Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
096699c4
Unverified
Commit
096699c4
authored
Dec 23, 2025
by
Patrick
Committed by
GitHub
Dec 23, 2025
Browse files
feat: KVBM Object Support - add runtime vars for object (#5063)
Signed-off-by:
Patrick Riel
<
priel@nvidia.com
>
parent
22199857
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
129 additions
and
5 deletions
+129
-5
lib/bindings/kvbm/src/block_manager/vllm/connector/leader/slot.rs
...ings/kvbm/src/block_manager/vllm/connector/leader/slot.rs
+1
-1
lib/llm/src/block_manager/metrics_kvbm.rs
lib/llm/src/block_manager/metrics_kvbm.rs
+76
-4
lib/runtime/src/config/environment_names.rs
lib/runtime/src/config/environment_names.rs
+31
-0
lib/runtime/src/metrics/prometheus_names.rs
lib/runtime/src/metrics/prometheus_names.rs
+21
-0
No files found.
lib/bindings/kvbm/src/block_manager/vllm/connector/leader/slot.rs
View file @
096699c4
...
...
@@ -214,7 +214,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> {
// Update Prometheus metrics
let
host_rate
=
cache_stats_clone
.host_hit_rate
();
let
disk_rate
=
cache_stats_clone
.disk_hit_rate
();
kvbm_metrics_clone
.update_cache_hit_rates
(
host_rate
,
disk_rate
);
kvbm_metrics_clone
.update_cache_hit_rates
(
host_rate
,
disk_rate
,
0.0
);
// Also log cache hit rates periodically
cache_stats_clone
.maybe_log
();
}
...
...
lib/llm/src/block_manager/metrics_kvbm.rs
View file @
096699c4
...
...
@@ -4,8 +4,10 @@
use
axum
::
Router
;
use
dynamo_runtime
::
metrics
::
prometheus_names
::{
kvbm
::{
DISK_CACHE_HIT_RATE
,
HOST_CACHE_HIT_RATE
,
MATCHED_TOKENS
,
OFFLOAD_BLOCKS_D2D
,
OFFLOAD_BLOCKS_D2H
,
OFFLOAD_BLOCKS_H2D
,
ONBOARD_BLOCKS_D2D
,
ONBOARD_BLOCKS_H2D
,
DISK_CACHE_HIT_RATE
,
HOST_CACHE_HIT_RATE
,
MATCHED_TOKENS
,
OBJECT_CACHE_HIT_RATE
,
OBJECT_READ_FAILURES
,
OBJECT_WRITE_FAILURES
,
OFFLOAD_BLOCKS_D2D
,
OFFLOAD_BLOCKS_D2H
,
OFFLOAD_BLOCKS_D2O
,
OFFLOAD_BLOCKS_H2D
,
ONBOARD_BLOCKS_D2D
,
ONBOARD_BLOCKS_H2D
,
ONBOARD_BLOCKS_O2D
,
},
sanitize_prometheus_name
,
};
...
...
@@ -26,12 +28,18 @@ pub struct KvbmMetrics {
// number of blocks offloaded from device to disk (bypassing host memory)
pub
offload_blocks_d2d
:
IntCounter
,
// number of blocks offloaded from device to object storage
pub
offload_blocks_d2o
:
IntCounter
,
// number of blocks onboarded from host to device
pub
onboard_blocks_h2d
:
IntCounter
,
// number of blocks onboarded from disk to device
pub
onboard_blocks_d2d
:
IntCounter
,
// number of blocks onboarded from object storage to device
pub
onboard_blocks_o2d
:
IntCounter
,
// number of matched tokens from KVBM
pub
matched_tokens
:
IntCounter
,
...
...
@@ -41,6 +49,15 @@ pub struct KvbmMetrics {
// disk cache hit rate (0.0-1.0) from the sliding window
pub
disk_cache_hit_rate
:
Gauge
,
// object cache hit rate (0.0-1.0) from the sliding window
pub
object_cache_hit_rate
:
Gauge
,
// number of failed object storage read operations (blocks)
pub
object_read_failures
:
IntCounter
,
// number of failed object storage write operations (blocks)
pub
object_write_failures
:
IntCounter
,
shutdown_notify
:
Option
<
Arc
<
Notify
>>
,
}
...
...
@@ -70,6 +87,13 @@ impl KvbmMetrics {
&
[],
)
.unwrap
();
let
offload_blocks_d2o
=
mr
.create_intcounter
(
OFFLOAD_BLOCKS_D2O
,
"The number of offload blocks from device to object storage"
,
&
[],
)
.unwrap
();
let
onboard_blocks_h2d
=
mr
.create_intcounter
(
ONBOARD_BLOCKS_H2D
,
...
...
@@ -84,6 +108,14 @@ impl KvbmMetrics {
&
[],
)
.unwrap
();
let
onboard_blocks_o2d
=
mr
.create_intcounter
(
ONBOARD_BLOCKS_O2D
,
"The number of onboard blocks from object storage to device"
,
&
[],
)
.unwrap
();
let
matched_tokens
=
mr
.create_intcounter
(
MATCHED_TOKENS
,
"The number of matched tokens"
,
&
[])
.unwrap
();
...
...
@@ -101,18 +133,43 @@ impl KvbmMetrics {
&
[],
)
.unwrap
();
let
object_cache_hit_rate
=
mr
.create_gauge
(
OBJECT_CACHE_HIT_RATE
,
"Object storage cache hit rate (0.0-1.0) from the sliding window"
,
&
[],
)
.unwrap
();
let
object_read_failures
=
mr
.create_intcounter
(
OBJECT_READ_FAILURES
,
"The number of failed object storage read operations (blocks)"
,
&
[],
)
.unwrap
();
let
object_write_failures
=
mr
.create_intcounter
(
OBJECT_WRITE_FAILURES
,
"The number of failed object storage write operations (blocks)"
,
&
[],
)
.unwrap
();
// early return if no endpoint is needed
if
!
create_endpoint
{
return
Self
{
offload_blocks_d2h
,
offload_blocks_h2d
,
offload_blocks_d2d
,
offload_blocks_d2o
,
onboard_blocks_h2d
,
onboard_blocks_d2d
,
onboard_blocks_o2d
,
matched_tokens
,
host_cache_hit_rate
,
disk_cache_hit_rate
,
object_cache_hit_rate
,
object_read_failures
,
object_write_failures
,
shutdown_notify
:
None
,
};
}
...
...
@@ -164,19 +221,34 @@ impl KvbmMetrics {
offload_blocks_d2h
,
offload_blocks_h2d
,
offload_blocks_d2d
,
offload_blocks_d2o
,
onboard_blocks_h2d
,
onboard_blocks_d2d
,
onboard_blocks_o2d
,
matched_tokens
,
host_cache_hit_rate
,
disk_cache_hit_rate
,
object_cache_hit_rate
,
object_read_failures
,
object_write_failures
,
shutdown_notify
:
Some
(
notify
),
}
}
/// Update cache hit rate metrics from a CacheStatsTracker
pub
fn
update_cache_hit_rates
(
&
self
,
host_rate
:
f32
,
disk_rate
:
f32
)
{
pub
fn
update_cache_hit_rates
(
&
self
,
host_rate
:
f32
,
disk_rate
:
f32
,
object_rate
:
f32
)
{
self
.host_cache_hit_rate
.set
(
host_rate
as
f64
);
self
.disk_cache_hit_rate
.set
(
disk_rate
as
f64
);
self
.object_cache_hit_rate
.set
(
object_rate
as
f64
);
}
/// Record failed object storage read operations
pub
fn
record_object_read_failure
(
&
self
,
num_blocks
:
u64
)
{
self
.object_read_failures
.inc_by
(
num_blocks
);
}
/// Record failed object storage write operations
pub
fn
record_object_write_failure
(
&
self
,
num_blocks
:
u64
)
{
self
.object_write_failures
.inc_by
(
num_blocks
);
}
}
...
...
lib/runtime/src/config/environment_names.rs
View file @
096699c4
...
...
@@ -195,6 +195,37 @@ pub mod kvbm {
"DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS"
;
}
/// Object storage configuration
pub
mod
object_storage
{
/// Enable object storage. Set to "1" to enable.
pub
const
DYN_KVBM_OBJECT_ENABLED
:
&
str
=
"DYN_KVBM_OBJECT_ENABLED"
;
/// Bucket name for object storage cache
/// Supports `{worker_id}` template for per-worker buckets
/// Example: "kv-cache-{worker_id}"
pub
const
DYN_KVBM_OBJECT_BUCKET
:
&
str
=
"DYN_KVBM_OBJECT_BUCKET"
;
/// Endpoint for object storage
pub
const
DYN_KVBM_OBJECT_ENDPOINT
:
&
str
=
"DYN_KVBM_OBJECT_ENDPOINT"
;
/// Region for object storage
pub
const
DYN_KVBM_OBJECT_REGION
:
&
str
=
"DYN_KVBM_OBJECT_REGION"
;
/// Access key for authentication
pub
const
DYN_KVBM_OBJECT_ACCESS_KEY
:
&
str
=
"DYN_KVBM_OBJECT_ACCESS_KEY"
;
/// Secret key for authentication
pub
const
DYN_KVBM_OBJECT_SECRET_KEY
:
&
str
=
"DYN_KVBM_OBJECT_SECRET_KEY"
;
/// Number of blocks to store in object storage
pub
const
DYN_KVBM_OBJECT_NUM_BLOCKS
:
&
str
=
"DYN_KVBM_OBJECT_NUM_BLOCKS"
;
}
/// Transfer configuration
pub
mod
transfer
{
/// Maximum number of blocks per transfer batch
pub
const
DYN_KVBM_TRANSFER_BATCH_SIZE
:
&
str
=
"DYN_KVBM_TRANSFER_BATCH_SIZE"
;
}
/// KVBM leader (distributed mode) configuration
pub
mod
leader
{
/// Timeout in seconds for KVBM leader and worker initialization
...
...
lib/runtime/src/metrics/prometheus_names.rs
View file @
096699c4
...
...
@@ -279,6 +279,27 @@ pub mod kvbm {
/// Disk cache hit rate (0.0-1.0) from the sliding window
pub
const
DISK_CACHE_HIT_RATE
:
&
str
=
"disk_cache_hit_rate"
;
/// Object storage cache hit rate (0.0-1.0) from the sliding window
pub
const
OBJECT_CACHE_HIT_RATE
:
&
str
=
"object_cache_hit_rate"
;
/// Number of blocks offloaded from device to object storage
pub
const
OFFLOAD_BLOCKS_D2O
:
&
str
=
"offload_blocks_d2o"
;
/// Number of blocks onboarded from object storage to device
pub
const
ONBOARD_BLOCKS_O2D
:
&
str
=
"onboard_blocks_o2d"
;
/// Bytes transferred to object storage (offload)
pub
const
OFFLOAD_BYTES_OBJECT
:
&
str
=
"offload_bytes_object"
;
/// Bytes transferred from object storage (onboard)
pub
const
ONBOARD_BYTES_OBJECT
:
&
str
=
"onboard_bytes_object"
;
/// Number of failed object storage read operations (blocks)
pub
const
OBJECT_READ_FAILURES
:
&
str
=
"object_read_failures"
;
/// Number of failed object storage write operations (blocks)
pub
const
OBJECT_WRITE_FAILURES
:
&
str
=
"object_write_failures"
;
}
/// KvStats metrics from LLM workers
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment