Unverified Commit b39382ba authored by Ziqi Fan's avatar Ziqi Fan Committed by GitHub
Browse files

feat: add initial batch of KVBM metrics on match, offload and onboard (#2673)

parent 35055c6f
...@@ -19,9 +19,22 @@ ...@@ -19,9 +19,22 @@
"editable": true, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"graphTooltip": 0, "graphTooltip": 0,
"id": 4, "id": 6,
"links": [], "links": [],
"panels": [ "panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 7,
"panels": [],
"title": "General",
"type": "row"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
...@@ -85,9 +98,9 @@ ...@@ -85,9 +98,9 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 0 "y": 1
}, },
"id": 1, "id": 10,
"options": { "options": {
"legend": { "legend": {
"calcs": [], "calcs": [],
...@@ -106,7 +119,7 @@ ...@@ -106,7 +119,7 @@
{ {
"disableTextWrap": false, "disableTextWrap": false,
"editorMode": "builder", "editorMode": "builder",
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}", "expr": "dynamo_component_matched_tokens{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false, "fullMetaSearch": false,
"includeNullMetadata": true, "includeNullMetadata": true,
"legendFormat": "__auto", "legendFormat": "__auto",
...@@ -115,9 +128,22 @@ ...@@ -115,9 +128,22 @@
"useBackend": false "useBackend": false
} }
], ],
"title": "KVBM Worker: save kv layer requests", "title": "Matched Tokens",
"type": "timeseries" "type": "timeseries"
}, },
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 9
},
"id": 5,
"panels": [],
"title": "Offload",
"type": "row"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
...@@ -181,7 +207,7 @@ ...@@ -181,7 +207,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 8 "y": 10
}, },
"id": 2, "id": 2,
"options": { "options": {
...@@ -211,7 +237,500 @@ ...@@ -211,7 +237,500 @@
"useBackend": false "useBackend": false
} }
], ],
"title": "KVBM Leader: offload requests", "title": "Offload Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 10
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_offload_blocks_d2h{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Offload Blocks",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 18
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Save KV Layer Requests",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 6,
"panels": [],
"title": "Onboard",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 27
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Onboard Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 27
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_blocks_h2d{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Onboard Blocks - Host to Device",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 35
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_blocks_d2d{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Onboard Blocks - Disk to Host",
"type": "timeseries" "type": "timeseries"
} }
], ],
...@@ -223,12 +742,12 @@ ...@@ -223,12 +742,12 @@
"list": [] "list": []
}, },
"time": { "time": {
"from": "now-15m", "from": "now-30m",
"to": "now" "to": "now"
}, },
"timepicker": {}, "timepicker": {},
"timezone": "browser", "timezone": "browser",
"title": "KVBM Dashboard", "title": "KVBM Dashboard",
"uid": "3f679257-70a5-402c-92b4-05382337b548", "uid": "3f679257-70a5-402c-92b4-05382337b548",
"version": 7 "version": 5
} }
\ No newline at end of file
...@@ -80,6 +80,7 @@ pub struct KvConnectorLeader { ...@@ -80,6 +80,7 @@ pub struct KvConnectorLeader {
inflight_requests: HashSet<String>, inflight_requests: HashSet<String>,
onboarding_slots: HashSet<String>, onboarding_slots: HashSet<String>,
iteration_counter: u64, iteration_counter: u64,
kvbm_metrics: KvbmMetrics,
} }
impl KvConnectorLeader { impl KvConnectorLeader {
...@@ -114,12 +115,13 @@ impl KvConnectorLeader { ...@@ -114,12 +115,13 @@ impl KvConnectorLeader {
block_manager.clone(), block_manager.clone(),
leader, leader,
drt.clone(), drt.clone(),
kvbm_metrics, kvbm_metrics.clone(),
), ),
block_size, block_size,
inflight_requests: HashSet::new(), inflight_requests: HashSet::new(),
onboarding_slots: HashSet::new(), onboarding_slots: HashSet::new(),
iteration_counter: 0, iteration_counter: 0,
kvbm_metrics,
} }
} }
} }
...@@ -188,6 +190,9 @@ impl Leader for KvConnectorLeader { ...@@ -188,6 +190,9 @@ impl Leader for KvConnectorLeader {
"scheduling onboarding for {} external tokens", "scheduling onboarding for {} external tokens",
num_external_tokens num_external_tokens
); );
self.kvbm_metrics
.matched_tokens
.inc_by(num_external_tokens as u64);
Ok((num_external_tokens, true)) Ok((num_external_tokens, true))
} else { } else {
Ok((0, false)) Ok((0, false))
......
...@@ -124,12 +124,13 @@ impl KvConnectorLeaderRecorder { ...@@ -124,12 +124,13 @@ impl KvConnectorLeaderRecorder {
block_manager.clone(), block_manager.clone(),
leader, leader,
drt.clone(), drt.clone(),
kvbm_metrics, kvbm_metrics.clone(),
), ),
block_size, block_size,
inflight_requests: HashSet::new(), inflight_requests: HashSet::new(),
onboarding_slots: HashSet::new(), onboarding_slots: HashSet::new(),
iteration_counter: 0, iteration_counter: 0,
kvbm_metrics,
}; };
let (unbounded_tx, unbounded_rx) = mpsc::unbounded_channel(); let (unbounded_tx, unbounded_rx) = mpsc::unbounded_channel();
......
...@@ -197,7 +197,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> { ...@@ -197,7 +197,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> {
let xfer_engine_task = CriticalTaskExecutionHandle::new_with_runtime( let xfer_engine_task = CriticalTaskExecutionHandle::new_with_runtime(
|cancellation_token| async move { |cancellation_token| async move {
xfer_engine xfer_engine
.execute(cancellation_token, drt_for_task, kvbm_metrics.clone()) .execute(cancellation_token, drt_for_task, kvbm_metrics)
.await .await
}, },
primary_token, primary_token,
...@@ -1042,6 +1042,9 @@ impl LocalTransferEngine { ...@@ -1042,6 +1042,9 @@ impl LocalTransferEngine {
let leader_offload = Arc::clone(&self.leader); let leader_offload = Arc::clone(&self.leader);
let leader_onboard = Arc::clone(&self.leader); let leader_onboard = Arc::clone(&self.leader);
let kvbm_metrics_onboard = kvbm_metrics.clone();
let kvbm_metrics_offload = kvbm_metrics.clone();
let onboard_task = CriticalTaskExecutionHandle::new_with_runtime( let onboard_task = CriticalTaskExecutionHandle::new_with_runtime(
|cancellation_token_onboard| async move { |cancellation_token_onboard| async move {
while let Some(req) = onboard_rx.recv().await { while let Some(req) = onboard_rx.recv().await {
...@@ -1049,7 +1052,10 @@ impl LocalTransferEngine { ...@@ -1049,7 +1052,10 @@ impl LocalTransferEngine {
tracing::debug!("LocalOnboardTask: received cancellation signal"); tracing::debug!("LocalOnboardTask: received cancellation signal");
break; break;
} }
if let Err(e) = process_onboard_request(req, &leader_onboard).await { if let Err(e) =
process_onboard_request(req, &leader_onboard, kvbm_metrics_onboard.clone())
.await
{
tracing::error!("LocalOnboardTask: error processing request: {:?}", e); tracing::error!("LocalOnboardTask: error processing request: {:?}", e);
} }
} }
...@@ -1071,7 +1077,7 @@ impl LocalTransferEngine { ...@@ -1071,7 +1077,7 @@ impl LocalTransferEngine {
req, req,
&block_manager_offload, &block_manager_offload,
&leader_offload, &leader_offload,
kvbm_metrics.clone(), kvbm_metrics_offload.clone(),
) )
.await .await
{ {
...@@ -1145,6 +1151,9 @@ async fn process_offload_request( ...@@ -1145,6 +1151,9 @@ async fn process_offload_request(
kvbm_metrics: KvbmMetrics, kvbm_metrics: KvbmMetrics,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
kvbm_metrics.offload_requests.inc(); kvbm_metrics.offload_requests.inc();
kvbm_metrics
.offload_blocks_d2h
.inc_by(offload_req.block_ids.len() as u64);
let request_id = &offload_req.request_id; let request_id = &offload_req.request_id;
let operation_id = &offload_req.operation_id; let operation_id = &offload_req.operation_id;
...@@ -1154,7 +1163,6 @@ async fn process_offload_request( ...@@ -1154,7 +1163,6 @@ async fn process_offload_request(
offload_req.block_ids.len() offload_req.block_ids.len()
); );
// TODO: Implement actual offload logic
// 1. Acquire mutable host blocks // 1. Acquire mutable host blocks
let host_blocks = block_manager let host_blocks = block_manager
.host() .host()
...@@ -1250,7 +1258,19 @@ async fn process_offload_request( ...@@ -1250,7 +1258,19 @@ async fn process_offload_request(
async fn process_onboard_request( async fn process_onboard_request(
onboard_req: LocalOnboardRequest, onboard_req: LocalOnboardRequest,
leader: &Arc<KvbmLeader>, leader: &Arc<KvbmLeader>,
kvbm_metrics: KvbmMetrics,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
kvbm_metrics.onboard_requests.inc();
if onboard_req.src_blocks.storage_pool() == BlockTransferPool::Host {
kvbm_metrics
.onboard_blocks_h2d
.inc_by(onboard_req.src_blocks.len() as u64);
} else if onboard_req.src_blocks.storage_pool() == BlockTransferPool::Disk {
kvbm_metrics
.onboard_blocks_d2d
.inc_by(onboard_req.src_blocks.len() as u64);
}
let request_id = &onboard_req.request_id; let request_id = &onboard_req.request_id;
let operation_id = &onboard_req.operation_id; let operation_id = &onboard_req.operation_id;
......
...@@ -265,7 +265,6 @@ impl Worker for KvConnectorWorker { ...@@ -265,7 +265,6 @@ impl Worker for KvConnectorWorker {
/// Trigger layer-wise completion signals. /// Trigger layer-wise completion signals.
/// Trigger block-wise completion signals afer last layer. /// Trigger block-wise completion signals afer last layer.
fn save_kv_layer(&mut self, _layer_name: String) -> anyhow::Result<()> { fn save_kv_layer(&mut self, _layer_name: String) -> anyhow::Result<()> {
self.kvbm_metrics.save_kv_layer_requests.inc();
self.layers_complete += 1; self.layers_complete += 1;
if self.layers_complete == self.kv_cache_layers.len() { if self.layers_complete == self.kv_cache_layers.len() {
let offloading_operations = std::mem::take(&mut self.offloading_operations); let offloading_operations = std::mem::take(&mut self.offloading_operations);
...@@ -278,6 +277,7 @@ impl Worker for KvConnectorWorker { ...@@ -278,6 +277,7 @@ impl Worker for KvConnectorWorker {
self.connector.enqueue_request(operation); self.connector.enqueue_request(operation);
} }
} }
self.kvbm_metrics.save_kv_layer_requests.inc();
Ok(()) Ok(())
} }
......
...@@ -6,8 +6,26 @@ use prometheus::IntCounter; ...@@ -6,8 +6,26 @@ use prometheus::IntCounter;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct KvbmMetrics { pub struct KvbmMetrics {
// number of offload requests
pub offload_requests: IntCounter, pub offload_requests: IntCounter,
// number of blocks offloaded from device to host
pub offload_blocks_d2h: IntCounter,
// number of onboard requests
pub onboard_requests: IntCounter,
// number of blocks onboarded from host to device
pub onboard_blocks_h2d: IntCounter,
// number of blocks onboarded from disk to device
pub onboard_blocks_d2d: IntCounter,
// number of save kv layer requests
pub save_kv_layer_requests: IntCounter, pub save_kv_layer_requests: IntCounter,
// number of matched tokens from KVBM
pub matched_tokens: IntCounter,
} }
impl KvbmMetrics { impl KvbmMetrics {
...@@ -15,6 +33,30 @@ impl KvbmMetrics { ...@@ -15,6 +33,30 @@ impl KvbmMetrics {
let offload_requests = mr let offload_requests = mr
.create_intcounter("offload_requests", "The number of offload requests", &[]) .create_intcounter("offload_requests", "The number of offload requests", &[])
.unwrap(); .unwrap();
let offload_blocks_d2h = mr
.create_intcounter(
"offload_blocks_d2h",
"The number of offload blocks from device to host",
&[],
)
.unwrap();
let onboard_requests = mr
.create_intcounter("onboard_requests", "The number of onboard requests", &[])
.unwrap();
let onboard_blocks_h2d = mr
.create_intcounter(
"onboard_blocks_h2d",
"The number of onboard blocks from host to device",
&[],
)
.unwrap();
let onboard_blocks_d2d = mr
.create_intcounter(
"onboard_blocks_d2d",
"The number of onboard blocks from disk to device",
&[],
)
.unwrap();
let save_kv_layer_requests = mr let save_kv_layer_requests = mr
.create_intcounter( .create_intcounter(
"save_kv_layer_requests", "save_kv_layer_requests",
...@@ -22,9 +64,17 @@ impl KvbmMetrics { ...@@ -22,9 +64,17 @@ impl KvbmMetrics {
&[], &[],
) )
.unwrap(); .unwrap();
let matched_tokens = mr
.create_intcounter("matched_tokens", "The number of matched tokens", &[])
.unwrap();
Self { Self {
offload_requests, offload_requests,
offload_blocks_d2h,
onboard_requests,
onboard_blocks_h2d,
onboard_blocks_d2d,
save_kv_layer_requests, save_kv_layer_requests,
matched_tokens,
} }
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment