Unverified Commit b39382ba authored by Ziqi Fan's avatar Ziqi Fan Committed by GitHub
Browse files

feat: add initial batch of KVBM metrics on match, offload and onboard (#2673)

parent 35055c6f
{ {
"annotations": { "annotations": {
"list": [ "list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "All KVBM related metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 4,
"links": [],
"panels": [
{ {
"builtIn": 1,
"datasource": { "datasource": {
"type": "prometheus", "type": "grafana",
"uid": "P1809F7CD0C75ACF3" "uid": "-- Grafana --"
}, },
"fieldConfig": { "enable": true,
"defaults": { "hide": true,
"color": { "iconColor": "rgba(0, 211, 255, 1)",
"mode": "palette-classic" "name": "Annotations & Alerts",
}, "type": "dashboard"
"custom": { }
"axisBorderShow": false, ]
"axisCenteredZero": false, },
"axisColorMode": "text", "description": "All KVBM related metrics",
"axisLabel": "", "editable": true,
"axisPlacement": "auto", "fiscalYearStartMonth": 0,
"barAlignment": 0, "graphTooltip": 0,
"barWidthFactor": 0.6, "id": 6,
"drawStyle": "line", "links": [],
"fillOpacity": 0, "panels": [
"gradientMode": "none", {
"hideFrom": { "collapsed": false,
"legend": false, "gridPos": {
"tooltip": false, "h": 1,
"viz": false "w": 24,
}, "x": 0,
"insertNulls": false, "y": 0
"lineInterpolation": "linear", },
"lineWidth": 1, "id": 7,
"pointSize": 5, "panels": [],
"scaleDistribution": { "title": "General",
"type": "linear" "type": "row"
}, },
"showPoints": "auto", {
"spanNulls": false, "datasource": {
"stacking": { "type": "prometheus",
"group": "A", "uid": "P1809F7CD0C75ACF3"
"mode": "none" },
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
}, },
"thresholdsStyle": { {
"mode": "off" "color": "red",
"value": 80
} }
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 1
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_matched_tokens{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Matched Tokens",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 9
},
"id": 5,
"panels": [],
"title": "Offload",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
}, },
"mappings": [], "showPoints": "auto",
"thresholds": { "spanNulls": false,
"mode": "absolute", "stacking": {
"steps": [ "group": "A",
{ "mode": "none"
"color": "green" },
}, "thresholdsStyle": {
{ "mode": "off"
"color": "red",
"value": 80
}
]
} }
}, },
"overrides": [] "mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
}, },
"gridPos": { "overrides": []
"h": 8, },
"w": 12, "gridPos": {
"x": 0, "h": 8,
"y": 0 "w": 12,
"x": 0,
"y": 10
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}, },
"id": 1, "tooltip": {
"options": { "hideZeros": false,
"legend": { "mode": "single",
"calcs": [], "sort": "none"
"displayMode": "list", }
"placement": "bottom", },
"showLegend": true "pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_offload_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Offload Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}, },
"tooltip": { "mappings": [],
"hideZeros": false, "thresholds": {
"mode": "single", "mode": "absolute",
"sort": "none" "steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
} }
}, },
"pluginVersion": "12.0.1", "overrides": []
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "KVBM Worker: save kv layer requests",
"type": "timeseries"
}, },
{ "gridPos": {
"datasource": { "h": 8,
"type": "prometheus", "w": 12,
"uid": "P1809F7CD0C75ACF3" "x": 12,
"y": 10
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}, },
"fieldConfig": { "tooltip": {
"defaults": { "hideZeros": false,
"color": { "mode": "single",
"mode": "palette-classic" "sort": "none"
}, }
"custom": { },
"axisBorderShow": false, "pluginVersion": "12.0.1",
"axisCenteredZero": false, "targets": [
"axisColorMode": "text", {
"axisLabel": "", "disableTextWrap": false,
"axisPlacement": "auto", "editorMode": "builder",
"barAlignment": 0, "expr": "dynamo_component_offload_blocks_d2h{dynamo_namespace=\"kvbm_connector_leader\"}",
"barWidthFactor": 0.6, "fullMetaSearch": false,
"drawStyle": "line", "includeNullMetadata": true,
"fillOpacity": 0, "legendFormat": "__auto",
"gradientMode": "none", "range": true,
"hideFrom": { "refId": "A",
"legend": false, "useBackend": false
"tooltip": false, }
"viz": false ],
}, "title": "Offload Blocks",
"insertNulls": false, "type": "timeseries"
"lineInterpolation": "linear", },
"lineWidth": 1, {
"pointSize": 5, "datasource": {
"scaleDistribution": { "type": "prometheus",
"type": "linear" "uid": "P1809F7CD0C75ACF3"
}, },
"showPoints": "auto", "fieldConfig": {
"spanNulls": false, "defaults": {
"stacking": { "color": {
"group": "A", "mode": "palette-classic"
"mode": "none" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
}, },
"thresholdsStyle": { {
"mode": "off" "color": "red",
"value": 80
} }
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 18
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Save KV Layer Requests",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 6,
"panels": [],
"title": "Onboard",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}, },
"mappings": [], "insertNulls": false,
"thresholds": { "lineInterpolation": "linear",
"mode": "absolute", "lineWidth": 1,
"steps": [ "pointSize": 5,
{ "scaleDistribution": {
"color": "green" "type": "linear"
}, },
{ "showPoints": "auto",
"color": "red", "spanNulls": false,
"value": 80 "stacking": {
} "group": "A",
] "mode": "none"
},
"thresholdsStyle": {
"mode": "off"
} }
}, },
"overrides": [] "mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
}, },
"gridPos": { "overrides": []
"h": 8, },
"w": 12, "gridPos": {
"x": 0, "h": 8,
"y": 8 "w": 12,
"x": 0,
"y": 27
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}, },
"id": 2, "tooltip": {
"options": { "hideZeros": false,
"legend": { "mode": "single",
"calcs": [], "sort": "none"
"displayMode": "list", }
"placement": "bottom", },
"showLegend": true "pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Onboard Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
}, },
"tooltip": { "custom": {
"hideZeros": false, "axisBorderShow": false,
"mode": "single", "axisCenteredZero": false,
"sort": "none" "axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
} }
}, },
"pluginVersion": "12.0.1", "overrides": []
"targets": [ },
{ "gridPos": {
"disableTextWrap": false, "h": 8,
"editorMode": "builder", "w": 12,
"expr": "dynamo_component_offload_requests{dynamo_namespace=\"kvbm_connector_leader\"}", "x": 12,
"fullMetaSearch": false, "y": 27
"includeNullMetadata": true, },
"legendFormat": "__auto", "id": 4,
"range": true, "options": {
"refId": "A", "legend": {
"useBackend": false "calcs": [],
} "displayMode": "list",
], "placement": "bottom",
"title": "KVBM Leader: offload requests", "showLegend": true
"type": "timeseries" },
} "tooltip": {
], "hideZeros": false,
"preload": false, "mode": "single",
"refresh": "auto", "sort": "none"
"schemaVersion": 41, }
"tags": [], },
"templating": { "pluginVersion": "12.0.1",
"list": [] "targets": [
}, {
"time": { "disableTextWrap": false,
"from": "now-15m", "editorMode": "builder",
"to": "now" "expr": "dynamo_component_onboard_blocks_h2d{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Onboard Blocks - Host to Device",
"type": "timeseries"
}, },
"timepicker": {}, {
"timezone": "browser", "datasource": {
"title": "KVBM Dashboard", "type": "prometheus",
"uid": "3f679257-70a5-402c-92b4-05382337b548", "uid": "P1809F7CD0C75ACF3"
"version": 7 },
} "fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 35
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_blocks_d2d{dynamo_namespace=\"kvbm_connector_leader\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Onboard Blocks - Disk to Host",
"type": "timeseries"
}
],
"preload": false,
"refresh": "auto",
"schemaVersion": 41,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "KVBM Dashboard",
"uid": "3f679257-70a5-402c-92b4-05382337b548",
"version": 5
}
\ No newline at end of file
...@@ -80,6 +80,7 @@ pub struct KvConnectorLeader { ...@@ -80,6 +80,7 @@ pub struct KvConnectorLeader {
inflight_requests: HashSet<String>, inflight_requests: HashSet<String>,
onboarding_slots: HashSet<String>, onboarding_slots: HashSet<String>,
iteration_counter: u64, iteration_counter: u64,
kvbm_metrics: KvbmMetrics,
} }
impl KvConnectorLeader { impl KvConnectorLeader {
...@@ -114,12 +115,13 @@ impl KvConnectorLeader { ...@@ -114,12 +115,13 @@ impl KvConnectorLeader {
block_manager.clone(), block_manager.clone(),
leader, leader,
drt.clone(), drt.clone(),
kvbm_metrics, kvbm_metrics.clone(),
), ),
block_size, block_size,
inflight_requests: HashSet::new(), inflight_requests: HashSet::new(),
onboarding_slots: HashSet::new(), onboarding_slots: HashSet::new(),
iteration_counter: 0, iteration_counter: 0,
kvbm_metrics,
} }
} }
} }
...@@ -188,6 +190,9 @@ impl Leader for KvConnectorLeader { ...@@ -188,6 +190,9 @@ impl Leader for KvConnectorLeader {
"scheduling onboarding for {} external tokens", "scheduling onboarding for {} external tokens",
num_external_tokens num_external_tokens
); );
self.kvbm_metrics
.matched_tokens
.inc_by(num_external_tokens as u64);
Ok((num_external_tokens, true)) Ok((num_external_tokens, true))
} else { } else {
Ok((0, false)) Ok((0, false))
......
...@@ -124,12 +124,13 @@ impl KvConnectorLeaderRecorder { ...@@ -124,12 +124,13 @@ impl KvConnectorLeaderRecorder {
block_manager.clone(), block_manager.clone(),
leader, leader,
drt.clone(), drt.clone(),
kvbm_metrics, kvbm_metrics.clone(),
), ),
block_size, block_size,
inflight_requests: HashSet::new(), inflight_requests: HashSet::new(),
onboarding_slots: HashSet::new(), onboarding_slots: HashSet::new(),
iteration_counter: 0, iteration_counter: 0,
kvbm_metrics,
}; };
let (unbounded_tx, unbounded_rx) = mpsc::unbounded_channel(); let (unbounded_tx, unbounded_rx) = mpsc::unbounded_channel();
......
...@@ -197,7 +197,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> { ...@@ -197,7 +197,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> {
let xfer_engine_task = CriticalTaskExecutionHandle::new_with_runtime( let xfer_engine_task = CriticalTaskExecutionHandle::new_with_runtime(
|cancellation_token| async move { |cancellation_token| async move {
xfer_engine xfer_engine
.execute(cancellation_token, drt_for_task, kvbm_metrics.clone()) .execute(cancellation_token, drt_for_task, kvbm_metrics)
.await .await
}, },
primary_token, primary_token,
...@@ -1042,6 +1042,9 @@ impl LocalTransferEngine { ...@@ -1042,6 +1042,9 @@ impl LocalTransferEngine {
let leader_offload = Arc::clone(&self.leader); let leader_offload = Arc::clone(&self.leader);
let leader_onboard = Arc::clone(&self.leader); let leader_onboard = Arc::clone(&self.leader);
let kvbm_metrics_onboard = kvbm_metrics.clone();
let kvbm_metrics_offload = kvbm_metrics.clone();
let onboard_task = CriticalTaskExecutionHandle::new_with_runtime( let onboard_task = CriticalTaskExecutionHandle::new_with_runtime(
|cancellation_token_onboard| async move { |cancellation_token_onboard| async move {
while let Some(req) = onboard_rx.recv().await { while let Some(req) = onboard_rx.recv().await {
...@@ -1049,7 +1052,10 @@ impl LocalTransferEngine { ...@@ -1049,7 +1052,10 @@ impl LocalTransferEngine {
tracing::debug!("LocalOnboardTask: received cancellation signal"); tracing::debug!("LocalOnboardTask: received cancellation signal");
break; break;
} }
if let Err(e) = process_onboard_request(req, &leader_onboard).await { if let Err(e) =
process_onboard_request(req, &leader_onboard, kvbm_metrics_onboard.clone())
.await
{
tracing::error!("LocalOnboardTask: error processing request: {:?}", e); tracing::error!("LocalOnboardTask: error processing request: {:?}", e);
} }
} }
...@@ -1071,7 +1077,7 @@ impl LocalTransferEngine { ...@@ -1071,7 +1077,7 @@ impl LocalTransferEngine {
req, req,
&block_manager_offload, &block_manager_offload,
&leader_offload, &leader_offload,
kvbm_metrics.clone(), kvbm_metrics_offload.clone(),
) )
.await .await
{ {
...@@ -1145,6 +1151,9 @@ async fn process_offload_request( ...@@ -1145,6 +1151,9 @@ async fn process_offload_request(
kvbm_metrics: KvbmMetrics, kvbm_metrics: KvbmMetrics,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
kvbm_metrics.offload_requests.inc(); kvbm_metrics.offload_requests.inc();
kvbm_metrics
.offload_blocks_d2h
.inc_by(offload_req.block_ids.len() as u64);
let request_id = &offload_req.request_id; let request_id = &offload_req.request_id;
let operation_id = &offload_req.operation_id; let operation_id = &offload_req.operation_id;
...@@ -1154,7 +1163,6 @@ async fn process_offload_request( ...@@ -1154,7 +1163,6 @@ async fn process_offload_request(
offload_req.block_ids.len() offload_req.block_ids.len()
); );
// TODO: Implement actual offload logic
// 1. Acquire mutable host blocks // 1. Acquire mutable host blocks
let host_blocks = block_manager let host_blocks = block_manager
.host() .host()
...@@ -1250,7 +1258,19 @@ async fn process_offload_request( ...@@ -1250,7 +1258,19 @@ async fn process_offload_request(
async fn process_onboard_request( async fn process_onboard_request(
onboard_req: LocalOnboardRequest, onboard_req: LocalOnboardRequest,
leader: &Arc<KvbmLeader>, leader: &Arc<KvbmLeader>,
kvbm_metrics: KvbmMetrics,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
kvbm_metrics.onboard_requests.inc();
if onboard_req.src_blocks.storage_pool() == BlockTransferPool::Host {
kvbm_metrics
.onboard_blocks_h2d
.inc_by(onboard_req.src_blocks.len() as u64);
} else if onboard_req.src_blocks.storage_pool() == BlockTransferPool::Disk {
kvbm_metrics
.onboard_blocks_d2d
.inc_by(onboard_req.src_blocks.len() as u64);
}
let request_id = &onboard_req.request_id; let request_id = &onboard_req.request_id;
let operation_id = &onboard_req.operation_id; let operation_id = &onboard_req.operation_id;
......
...@@ -265,7 +265,6 @@ impl Worker for KvConnectorWorker { ...@@ -265,7 +265,6 @@ impl Worker for KvConnectorWorker {
/// Trigger layer-wise completion signals. /// Trigger layer-wise completion signals.
/// Trigger block-wise completion signals afer last layer. /// Trigger block-wise completion signals afer last layer.
fn save_kv_layer(&mut self, _layer_name: String) -> anyhow::Result<()> { fn save_kv_layer(&mut self, _layer_name: String) -> anyhow::Result<()> {
self.kvbm_metrics.save_kv_layer_requests.inc();
self.layers_complete += 1; self.layers_complete += 1;
if self.layers_complete == self.kv_cache_layers.len() { if self.layers_complete == self.kv_cache_layers.len() {
let offloading_operations = std::mem::take(&mut self.offloading_operations); let offloading_operations = std::mem::take(&mut self.offloading_operations);
...@@ -278,6 +277,7 @@ impl Worker for KvConnectorWorker { ...@@ -278,6 +277,7 @@ impl Worker for KvConnectorWorker {
self.connector.enqueue_request(operation); self.connector.enqueue_request(operation);
} }
} }
self.kvbm_metrics.save_kv_layer_requests.inc();
Ok(()) Ok(())
} }
......
...@@ -6,8 +6,26 @@ use prometheus::IntCounter; ...@@ -6,8 +6,26 @@ use prometheus::IntCounter;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct KvbmMetrics { pub struct KvbmMetrics {
// number of offload requests
pub offload_requests: IntCounter, pub offload_requests: IntCounter,
// number of blocks offloaded from device to host
pub offload_blocks_d2h: IntCounter,
// number of onboard requests
pub onboard_requests: IntCounter,
// number of blocks onboarded from host to device
pub onboard_blocks_h2d: IntCounter,
// number of blocks onboarded from disk to device
pub onboard_blocks_d2d: IntCounter,
// number of save kv layer requests
pub save_kv_layer_requests: IntCounter, pub save_kv_layer_requests: IntCounter,
// number of matched tokens from KVBM
pub matched_tokens: IntCounter,
} }
impl KvbmMetrics { impl KvbmMetrics {
...@@ -15,6 +33,30 @@ impl KvbmMetrics { ...@@ -15,6 +33,30 @@ impl KvbmMetrics {
let offload_requests = mr let offload_requests = mr
.create_intcounter("offload_requests", "The number of offload requests", &[]) .create_intcounter("offload_requests", "The number of offload requests", &[])
.unwrap(); .unwrap();
let offload_blocks_d2h = mr
.create_intcounter(
"offload_blocks_d2h",
"The number of offload blocks from device to host",
&[],
)
.unwrap();
let onboard_requests = mr
.create_intcounter("onboard_requests", "The number of onboard requests", &[])
.unwrap();
let onboard_blocks_h2d = mr
.create_intcounter(
"onboard_blocks_h2d",
"The number of onboard blocks from host to device",
&[],
)
.unwrap();
let onboard_blocks_d2d = mr
.create_intcounter(
"onboard_blocks_d2d",
"The number of onboard blocks from disk to device",
&[],
)
.unwrap();
let save_kv_layer_requests = mr let save_kv_layer_requests = mr
.create_intcounter( .create_intcounter(
"save_kv_layer_requests", "save_kv_layer_requests",
...@@ -22,9 +64,17 @@ impl KvbmMetrics { ...@@ -22,9 +64,17 @@ impl KvbmMetrics {
&[], &[],
) )
.unwrap(); .unwrap();
let matched_tokens = mr
.create_intcounter("matched_tokens", "The number of matched tokens", &[])
.unwrap();
Self { Self {
offload_requests, offload_requests,
offload_blocks_d2h,
onboard_requests,
onboard_blocks_h2d,
onboard_blocks_d2d,
save_kv_layer_requests, save_kv_layer_requests,
matched_tokens,
} }
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment