Unverified Commit 096699c4 authored by Patrick's avatar Patrick Committed by GitHub
Browse files

feat: KVBM Object Support - add runtime vars for object (#5063)


Signed-off-by: default avatarPatrick Riel <priel@nvidia.com>
parent 22199857
...@@ -214,7 +214,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> { ...@@ -214,7 +214,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> {
// Update Prometheus metrics // Update Prometheus metrics
let host_rate = cache_stats_clone.host_hit_rate(); let host_rate = cache_stats_clone.host_hit_rate();
let disk_rate = cache_stats_clone.disk_hit_rate(); let disk_rate = cache_stats_clone.disk_hit_rate();
kvbm_metrics_clone.update_cache_hit_rates(host_rate, disk_rate); kvbm_metrics_clone.update_cache_hit_rates(host_rate, disk_rate, 0.0);
// Also log cache hit rates periodically // Also log cache hit rates periodically
cache_stats_clone.maybe_log(); cache_stats_clone.maybe_log();
} }
......
...@@ -4,8 +4,10 @@ ...@@ -4,8 +4,10 @@
use axum::Router; use axum::Router;
use dynamo_runtime::metrics::prometheus_names::{ use dynamo_runtime::metrics::prometheus_names::{
kvbm::{ kvbm::{
DISK_CACHE_HIT_RATE, HOST_CACHE_HIT_RATE, MATCHED_TOKENS, OFFLOAD_BLOCKS_D2D, DISK_CACHE_HIT_RATE, HOST_CACHE_HIT_RATE, MATCHED_TOKENS, OBJECT_CACHE_HIT_RATE,
OFFLOAD_BLOCKS_D2H, OFFLOAD_BLOCKS_H2D, ONBOARD_BLOCKS_D2D, ONBOARD_BLOCKS_H2D, OBJECT_READ_FAILURES, OBJECT_WRITE_FAILURES, OFFLOAD_BLOCKS_D2D, OFFLOAD_BLOCKS_D2H,
OFFLOAD_BLOCKS_D2O, OFFLOAD_BLOCKS_H2D, ONBOARD_BLOCKS_D2D, ONBOARD_BLOCKS_H2D,
ONBOARD_BLOCKS_O2D,
}, },
sanitize_prometheus_name, sanitize_prometheus_name,
}; };
...@@ -26,12 +28,18 @@ pub struct KvbmMetrics { ...@@ -26,12 +28,18 @@ pub struct KvbmMetrics {
// number of blocks offloaded from device to disk (bypassing host memory) // number of blocks offloaded from device to disk (bypassing host memory)
pub offload_blocks_d2d: IntCounter, pub offload_blocks_d2d: IntCounter,
// number of blocks offloaded from device to object storage
pub offload_blocks_d2o: IntCounter,
// number of blocks onboarded from host to device // number of blocks onboarded from host to device
pub onboard_blocks_h2d: IntCounter, pub onboard_blocks_h2d: IntCounter,
// number of blocks onboarded from disk to device // number of blocks onboarded from disk to device
pub onboard_blocks_d2d: IntCounter, pub onboard_blocks_d2d: IntCounter,
// number of blocks onboarded from object storage to device
pub onboard_blocks_o2d: IntCounter,
// number of matched tokens from KVBM // number of matched tokens from KVBM
pub matched_tokens: IntCounter, pub matched_tokens: IntCounter,
...@@ -41,6 +49,15 @@ pub struct KvbmMetrics { ...@@ -41,6 +49,15 @@ pub struct KvbmMetrics {
// disk cache hit rate (0.0-1.0) from the sliding window // disk cache hit rate (0.0-1.0) from the sliding window
pub disk_cache_hit_rate: Gauge, pub disk_cache_hit_rate: Gauge,
// object cache hit rate (0.0-1.0) from the sliding window
pub object_cache_hit_rate: Gauge,
// number of failed object storage read operations (blocks)
pub object_read_failures: IntCounter,
// number of failed object storage write operations (blocks)
pub object_write_failures: IntCounter,
shutdown_notify: Option<Arc<Notify>>, shutdown_notify: Option<Arc<Notify>>,
} }
...@@ -70,6 +87,13 @@ impl KvbmMetrics { ...@@ -70,6 +87,13 @@ impl KvbmMetrics {
&[], &[],
) )
.unwrap(); .unwrap();
let offload_blocks_d2o = mr
.create_intcounter(
OFFLOAD_BLOCKS_D2O,
"The number of offload blocks from device to object storage",
&[],
)
.unwrap();
let onboard_blocks_h2d = mr let onboard_blocks_h2d = mr
.create_intcounter( .create_intcounter(
ONBOARD_BLOCKS_H2D, ONBOARD_BLOCKS_H2D,
...@@ -84,6 +108,14 @@ impl KvbmMetrics { ...@@ -84,6 +108,14 @@ impl KvbmMetrics {
&[], &[],
) )
.unwrap(); .unwrap();
let onboard_blocks_o2d = mr
.create_intcounter(
ONBOARD_BLOCKS_O2D,
"The number of onboard blocks from object storage to device",
&[],
)
.unwrap();
let matched_tokens = mr let matched_tokens = mr
.create_intcounter(MATCHED_TOKENS, "The number of matched tokens", &[]) .create_intcounter(MATCHED_TOKENS, "The number of matched tokens", &[])
.unwrap(); .unwrap();
...@@ -101,18 +133,43 @@ impl KvbmMetrics { ...@@ -101,18 +133,43 @@ impl KvbmMetrics {
&[], &[],
) )
.unwrap(); .unwrap();
let object_cache_hit_rate = mr
.create_gauge(
OBJECT_CACHE_HIT_RATE,
"Object storage cache hit rate (0.0-1.0) from the sliding window",
&[],
)
.unwrap();
let object_read_failures = mr
.create_intcounter(
OBJECT_READ_FAILURES,
"The number of failed object storage read operations (blocks)",
&[],
)
.unwrap();
let object_write_failures = mr
.create_intcounter(
OBJECT_WRITE_FAILURES,
"The number of failed object storage write operations (blocks)",
&[],
)
.unwrap();
// early return if no endpoint is needed // early return if no endpoint is needed
if !create_endpoint { if !create_endpoint {
return Self { return Self {
offload_blocks_d2h, offload_blocks_d2h,
offload_blocks_h2d, offload_blocks_h2d,
offload_blocks_d2d, offload_blocks_d2d,
offload_blocks_d2o,
onboard_blocks_h2d, onboard_blocks_h2d,
onboard_blocks_d2d, onboard_blocks_d2d,
onboard_blocks_o2d,
matched_tokens, matched_tokens,
host_cache_hit_rate, host_cache_hit_rate,
disk_cache_hit_rate, disk_cache_hit_rate,
object_cache_hit_rate,
object_read_failures,
object_write_failures,
shutdown_notify: None, shutdown_notify: None,
}; };
} }
...@@ -164,19 +221,34 @@ impl KvbmMetrics { ...@@ -164,19 +221,34 @@ impl KvbmMetrics {
offload_blocks_d2h, offload_blocks_d2h,
offload_blocks_h2d, offload_blocks_h2d,
offload_blocks_d2d, offload_blocks_d2d,
offload_blocks_d2o,
onboard_blocks_h2d, onboard_blocks_h2d,
onboard_blocks_d2d, onboard_blocks_d2d,
onboard_blocks_o2d,
matched_tokens, matched_tokens,
host_cache_hit_rate, host_cache_hit_rate,
disk_cache_hit_rate, disk_cache_hit_rate,
object_cache_hit_rate,
object_read_failures,
object_write_failures,
shutdown_notify: Some(notify), shutdown_notify: Some(notify),
} }
} }
/// Update cache hit rate metrics from a CacheStatsTracker /// Update cache hit rate metrics from a CacheStatsTracker
pub fn update_cache_hit_rates(&self, host_rate: f32, disk_rate: f32) { pub fn update_cache_hit_rates(&self, host_rate: f32, disk_rate: f32, object_rate: f32) {
self.host_cache_hit_rate.set(host_rate as f64); self.host_cache_hit_rate.set(host_rate as f64);
self.disk_cache_hit_rate.set(disk_rate as f64); self.disk_cache_hit_rate.set(disk_rate as f64);
self.object_cache_hit_rate.set(object_rate as f64);
}
/// Record failed object storage read operations
pub fn record_object_read_failure(&self, num_blocks: u64) {
self.object_read_failures.inc_by(num_blocks);
}
/// Record failed object storage write operations
pub fn record_object_write_failure(&self, num_blocks: u64) {
self.object_write_failures.inc_by(num_blocks);
} }
} }
......
...@@ -195,6 +195,37 @@ pub mod kvbm { ...@@ -195,6 +195,37 @@ pub mod kvbm {
"DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS"; "DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS";
} }
/// Object storage configuration
pub mod object_storage {
/// Enable object storage. Set to "1" to enable.
pub const DYN_KVBM_OBJECT_ENABLED: &str = "DYN_KVBM_OBJECT_ENABLED";
/// Bucket name for object storage cache
/// Supports `{worker_id}` template for per-worker buckets
/// Example: "kv-cache-{worker_id}"
pub const DYN_KVBM_OBJECT_BUCKET: &str = "DYN_KVBM_OBJECT_BUCKET";
/// Endpoint for object storage
pub const DYN_KVBM_OBJECT_ENDPOINT: &str = "DYN_KVBM_OBJECT_ENDPOINT";
/// Region for object storage
pub const DYN_KVBM_OBJECT_REGION: &str = "DYN_KVBM_OBJECT_REGION";
/// Access key for authentication
pub const DYN_KVBM_OBJECT_ACCESS_KEY: &str = "DYN_KVBM_OBJECT_ACCESS_KEY";
/// Secret key for authentication
pub const DYN_KVBM_OBJECT_SECRET_KEY: &str = "DYN_KVBM_OBJECT_SECRET_KEY";
/// Number of blocks to store in object storage
pub const DYN_KVBM_OBJECT_NUM_BLOCKS: &str = "DYN_KVBM_OBJECT_NUM_BLOCKS";
}
/// Transfer configuration
pub mod transfer {
/// Maximum number of blocks per transfer batch
pub const DYN_KVBM_TRANSFER_BATCH_SIZE: &str = "DYN_KVBM_TRANSFER_BATCH_SIZE";
}
/// KVBM leader (distributed mode) configuration /// KVBM leader (distributed mode) configuration
pub mod leader { pub mod leader {
/// Timeout in seconds for KVBM leader and worker initialization /// Timeout in seconds for KVBM leader and worker initialization
......
...@@ -279,6 +279,27 @@ pub mod kvbm { ...@@ -279,6 +279,27 @@ pub mod kvbm {
/// Disk cache hit rate (0.0-1.0) from the sliding window /// Disk cache hit rate (0.0-1.0) from the sliding window
pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate"; pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate";
/// Object storage cache hit rate (0.0-1.0) from the sliding window
pub const OBJECT_CACHE_HIT_RATE: &str = "object_cache_hit_rate";
/// Number of blocks offloaded from device to object storage
pub const OFFLOAD_BLOCKS_D2O: &str = "offload_blocks_d2o";
/// Number of blocks onboarded from object storage to device
pub const ONBOARD_BLOCKS_O2D: &str = "onboard_blocks_o2d";
/// Bytes transferred to object storage (offload)
pub const OFFLOAD_BYTES_OBJECT: &str = "offload_bytes_object";
/// Bytes transferred from object storage (onboard)
pub const ONBOARD_BYTES_OBJECT: &str = "onboard_bytes_object";
/// Number of failed object storage read operations (blocks)
pub const OBJECT_READ_FAILURES: &str = "object_read_failures";
/// Number of failed object storage write operations (blocks)
pub const OBJECT_WRITE_FAILURES: &str = "object_write_failures";
} }
/// KvStats metrics from LLM workers /// KvStats metrics from LLM workers
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment