Unverified Commit 096699c4 authored by Patrick's avatar Patrick Committed by GitHub
Browse files

feat: KVBM Object Support - add runtime vars for object (#5063)


Signed-off-by: default avatarPatrick Riel <priel@nvidia.com>
parent 22199857
......@@ -214,7 +214,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> {
// Update Prometheus metrics
let host_rate = cache_stats_clone.host_hit_rate();
let disk_rate = cache_stats_clone.disk_hit_rate();
kvbm_metrics_clone.update_cache_hit_rates(host_rate, disk_rate);
kvbm_metrics_clone.update_cache_hit_rates(host_rate, disk_rate, 0.0);
// Also log cache hit rates periodically
cache_stats_clone.maybe_log();
}
......
......@@ -4,8 +4,10 @@
use axum::Router;
use dynamo_runtime::metrics::prometheus_names::{
kvbm::{
DISK_CACHE_HIT_RATE, HOST_CACHE_HIT_RATE, MATCHED_TOKENS, OFFLOAD_BLOCKS_D2D,
OFFLOAD_BLOCKS_D2H, OFFLOAD_BLOCKS_H2D, ONBOARD_BLOCKS_D2D, ONBOARD_BLOCKS_H2D,
DISK_CACHE_HIT_RATE, HOST_CACHE_HIT_RATE, MATCHED_TOKENS, OBJECT_CACHE_HIT_RATE,
OBJECT_READ_FAILURES, OBJECT_WRITE_FAILURES, OFFLOAD_BLOCKS_D2D, OFFLOAD_BLOCKS_D2H,
OFFLOAD_BLOCKS_D2O, OFFLOAD_BLOCKS_H2D, ONBOARD_BLOCKS_D2D, ONBOARD_BLOCKS_H2D,
ONBOARD_BLOCKS_O2D,
},
sanitize_prometheus_name,
};
......@@ -26,12 +28,18 @@ pub struct KvbmMetrics {
// number of blocks offloaded from device to disk (bypassing host memory)
pub offload_blocks_d2d: IntCounter,
// number of blocks offloaded from device to object storage
pub offload_blocks_d2o: IntCounter,
// number of blocks onboarded from host to device
pub onboard_blocks_h2d: IntCounter,
// number of blocks onboarded from disk to device
pub onboard_blocks_d2d: IntCounter,
// number of blocks onboarded from object storage to device
pub onboard_blocks_o2d: IntCounter,
// number of matched tokens from KVBM
pub matched_tokens: IntCounter,
......@@ -41,6 +49,15 @@ pub struct KvbmMetrics {
// disk cache hit rate (0.0-1.0) from the sliding window
pub disk_cache_hit_rate: Gauge,
// object cache hit rate (0.0-1.0) from the sliding window
pub object_cache_hit_rate: Gauge,
// number of failed object storage read operations (blocks)
pub object_read_failures: IntCounter,
// number of failed object storage write operations (blocks)
pub object_write_failures: IntCounter,
shutdown_notify: Option<Arc<Notify>>,
}
......@@ -70,6 +87,13 @@ impl KvbmMetrics {
&[],
)
.unwrap();
let offload_blocks_d2o = mr
.create_intcounter(
OFFLOAD_BLOCKS_D2O,
"The number of offload blocks from device to object storage",
&[],
)
.unwrap();
let onboard_blocks_h2d = mr
.create_intcounter(
ONBOARD_BLOCKS_H2D,
......@@ -84,6 +108,14 @@ impl KvbmMetrics {
&[],
)
.unwrap();
let onboard_blocks_o2d = mr
.create_intcounter(
ONBOARD_BLOCKS_O2D,
"The number of onboard blocks from object storage to device",
&[],
)
.unwrap();
let matched_tokens = mr
.create_intcounter(MATCHED_TOKENS, "The number of matched tokens", &[])
.unwrap();
......@@ -101,18 +133,43 @@ impl KvbmMetrics {
&[],
)
.unwrap();
let object_cache_hit_rate = mr
.create_gauge(
OBJECT_CACHE_HIT_RATE,
"Object storage cache hit rate (0.0-1.0) from the sliding window",
&[],
)
.unwrap();
let object_read_failures = mr
.create_intcounter(
OBJECT_READ_FAILURES,
"The number of failed object storage read operations (blocks)",
&[],
)
.unwrap();
let object_write_failures = mr
.create_intcounter(
OBJECT_WRITE_FAILURES,
"The number of failed object storage write operations (blocks)",
&[],
)
.unwrap();
// early return if no endpoint is needed
if !create_endpoint {
return Self {
offload_blocks_d2h,
offload_blocks_h2d,
offload_blocks_d2d,
offload_blocks_d2o,
onboard_blocks_h2d,
onboard_blocks_d2d,
onboard_blocks_o2d,
matched_tokens,
host_cache_hit_rate,
disk_cache_hit_rate,
object_cache_hit_rate,
object_read_failures,
object_write_failures,
shutdown_notify: None,
};
}
......@@ -164,19 +221,34 @@ impl KvbmMetrics {
offload_blocks_d2h,
offload_blocks_h2d,
offload_blocks_d2d,
offload_blocks_d2o,
onboard_blocks_h2d,
onboard_blocks_d2d,
onboard_blocks_o2d,
matched_tokens,
host_cache_hit_rate,
disk_cache_hit_rate,
object_cache_hit_rate,
object_read_failures,
object_write_failures,
shutdown_notify: Some(notify),
}
}
/// Update cache hit rate metrics from a CacheStatsTracker
pub fn update_cache_hit_rates(&self, host_rate: f32, disk_rate: f32) {
pub fn update_cache_hit_rates(&self, host_rate: f32, disk_rate: f32, object_rate: f32) {
self.host_cache_hit_rate.set(host_rate as f64);
self.disk_cache_hit_rate.set(disk_rate as f64);
self.object_cache_hit_rate.set(object_rate as f64);
}
/// Record failed object storage read operations
pub fn record_object_read_failure(&self, num_blocks: u64) {
self.object_read_failures.inc_by(num_blocks);
}
/// Record failed object storage write operations
pub fn record_object_write_failure(&self, num_blocks: u64) {
self.object_write_failures.inc_by(num_blocks);
}
}
......
......@@ -195,6 +195,37 @@ pub mod kvbm {
"DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS";
}
/// Object storage configuration
pub mod object_storage {
/// Enable object storage. Set to "1" to enable.
pub const DYN_KVBM_OBJECT_ENABLED: &str = "DYN_KVBM_OBJECT_ENABLED";
/// Bucket name for object storage cache
/// Supports `{worker_id}` template for per-worker buckets
/// Example: "kv-cache-{worker_id}"
pub const DYN_KVBM_OBJECT_BUCKET: &str = "DYN_KVBM_OBJECT_BUCKET";
/// Endpoint for object storage
pub const DYN_KVBM_OBJECT_ENDPOINT: &str = "DYN_KVBM_OBJECT_ENDPOINT";
/// Region for object storage
pub const DYN_KVBM_OBJECT_REGION: &str = "DYN_KVBM_OBJECT_REGION";
/// Access key for authentication
pub const DYN_KVBM_OBJECT_ACCESS_KEY: &str = "DYN_KVBM_OBJECT_ACCESS_KEY";
/// Secret key for authentication
pub const DYN_KVBM_OBJECT_SECRET_KEY: &str = "DYN_KVBM_OBJECT_SECRET_KEY";
/// Number of blocks to store in object storage
pub const DYN_KVBM_OBJECT_NUM_BLOCKS: &str = "DYN_KVBM_OBJECT_NUM_BLOCKS";
}
/// Transfer configuration
pub mod transfer {
/// Maximum number of blocks per transfer batch
pub const DYN_KVBM_TRANSFER_BATCH_SIZE: &str = "DYN_KVBM_TRANSFER_BATCH_SIZE";
}
/// KVBM leader (distributed mode) configuration
pub mod leader {
/// Timeout in seconds for KVBM leader and worker initialization
......
......@@ -279,6 +279,27 @@ pub mod kvbm {
/// Disk cache hit rate (0.0-1.0) from the sliding window
pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate";
/// Object storage cache hit rate (0.0-1.0) from the sliding window
pub const OBJECT_CACHE_HIT_RATE: &str = "object_cache_hit_rate";
/// Number of blocks offloaded from device to object storage
pub const OFFLOAD_BLOCKS_D2O: &str = "offload_blocks_d2o";
/// Number of blocks onboarded from object storage to device
pub const ONBOARD_BLOCKS_O2D: &str = "onboard_blocks_o2d";
/// Bytes transferred to object storage (offload)
pub const OFFLOAD_BYTES_OBJECT: &str = "offload_bytes_object";
/// Bytes transferred from object storage (onboard)
pub const ONBOARD_BYTES_OBJECT: &str = "onboard_bytes_object";
/// Number of failed object storage read operations (blocks)
pub const OBJECT_READ_FAILURES: &str = "object_read_failures";
/// Number of failed object storage write operations (blocks)
pub const OBJECT_WRITE_FAILURES: &str = "object_write_failures";
}
/// KvStats metrics from LLM workers
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment