Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b39382ba
"lib/llm/vscode:/vscode.git/clone" did not exist on "e3346dabb06d1004d46a5fea92f6b86bf2d966e4"
Unverified
Commit
b39382ba
authored
Aug 25, 2025
by
Ziqi Fan
Committed by
GitHub
Aug 25, 2025
Browse files
feat: add initial batch of KVBM metrics on match, offload and onboard (#2673)
parent
35055c6f
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
809 additions
and
214 deletions
+809
-214
deploy/metrics/grafana_dashboards/grafana-kvbm-dashboard.json
...oy/metrics/grafana_dashboards/grafana-kvbm-dashboard.json
+726
-207
lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs
...gs/python/rust/llm/block_manager/vllm/connector/leader.rs
+6
-1
lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/recorder.rs
.../rust/llm/block_manager/vllm/connector/leader/recorder.rs
+2
-1
lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs
...thon/rust/llm/block_manager/vllm/connector/leader/slot.rs
+24
-4
lib/bindings/python/rust/llm/block_manager/vllm/connector/worker.rs
...gs/python/rust/llm/block_manager/vllm/connector/worker.rs
+1
-1
lib/llm/src/block_manager/metrics_kvbm.rs
lib/llm/src/block_manager/metrics_kvbm.rs
+50
-0
No files found.
deploy/metrics/grafana_dashboards/grafana-kvbm-dashboard.json
View file @
b39382ba
...
...
@@ -19,9 +19,22 @@
"editable"
:
true
,
"fiscalYearStartMonth"
:
0
,
"graphTooltip"
:
0
,
"id"
:
4
,
"id"
:
6
,
"links"
:
[],
"panels"
:
[
{
"collapsed"
:
false
,
"gridPos"
:
{
"h"
:
1
,
"w"
:
24
,
"x"
:
0
,
"y"
:
0
},
"id"
:
7
,
"panels"
:
[],
"title"
:
"General"
,
"type"
:
"row"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
...
...
@@ -85,9 +98,9 @@
"h"
:
8
,
"w"
:
12
,
"x"
:
0
,
"y"
:
0
"y"
:
1
},
"id"
:
1
,
"id"
:
1
0
,
"options"
:
{
"legend"
:
{
"calcs"
:
[],
...
...
@@ -106,7 +119,7 @@
{
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"dynamo_component_
save_kv_layer_request
s{dynamo_namespace=
\"
kvbm_connector_
work
er
\"
}"
,
"expr"
:
"dynamo_component_
matched_token
s{dynamo_namespace=
\"
kvbm_connector_
lead
er
\"
}"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"legendFormat"
:
"__auto"
,
...
...
@@ -115,9 +128,22 @@
"useBackend"
:
false
}
],
"title"
:
"
KVBM Worker: save kv layer request
s"
,
"title"
:
"
Matched Token
s"
,
"type"
:
"timeseries"
},
{
"collapsed"
:
false
,
"gridPos"
:
{
"h"
:
1
,
"w"
:
24
,
"x"
:
0
,
"y"
:
9
},
"id"
:
5
,
"panels"
:
[],
"title"
:
"Offload"
,
"type"
:
"row"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
...
...
@@ -181,7 +207,7 @@
"h"
:
8
,
"w"
:
12
,
"x"
:
0
,
"y"
:
8
"y"
:
10
},
"id"
:
2
,
"options"
:
{
...
...
@@ -211,7 +237,500 @@
"useBackend"
:
false
}
],
"title"
:
"KVBM Leader: offload requests"
,
"title"
:
"Offload Requests"
,
"type"
:
"timeseries"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"P1809F7CD0C75ACF3"
},
"fieldConfig"
:
{
"defaults"
:
{
"color"
:
{
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
"axisPlacement"
:
"auto"
,
"barAlignment"
:
0
,
"barWidthFactor"
:
0.6
,
"drawStyle"
:
"line"
,
"fillOpacity"
:
0
,
"gradientMode"
:
"none"
,
"hideFrom"
:
{
"legend"
:
false
,
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
"scaleDistribution"
:
{
"type"
:
"linear"
},
"showPoints"
:
"auto"
,
"spanNulls"
:
false
,
"stacking"
:
{
"group"
:
"A"
,
"mode"
:
"none"
},
"thresholdsStyle"
:
{
"mode"
:
"off"
}
},
"mappings"
:
[],
"thresholds"
:
{
"mode"
:
"absolute"
,
"steps"
:
[
{
"color"
:
"green"
},
{
"color"
:
"red"
,
"value"
:
80
}
]
}
},
"overrides"
:
[]
},
"gridPos"
:
{
"h"
:
8
,
"w"
:
12
,
"x"
:
12
,
"y"
:
10
},
"id"
:
3
,
"options"
:
{
"legend"
:
{
"calcs"
:
[],
"displayMode"
:
"list"
,
"placement"
:
"bottom"
,
"showLegend"
:
true
},
"tooltip"
:
{
"hideZeros"
:
false
,
"mode"
:
"single"
,
"sort"
:
"none"
}
},
"pluginVersion"
:
"12.0.1"
,
"targets"
:
[
{
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"dynamo_component_offload_blocks_d2h{dynamo_namespace=
\"
kvbm_connector_leader
\"
}"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"legendFormat"
:
"__auto"
,
"range"
:
true
,
"refId"
:
"A"
,
"useBackend"
:
false
}
],
"title"
:
"Offload Blocks"
,
"type"
:
"timeseries"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"P1809F7CD0C75ACF3"
},
"fieldConfig"
:
{
"defaults"
:
{
"color"
:
{
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
"axisPlacement"
:
"auto"
,
"barAlignment"
:
0
,
"barWidthFactor"
:
0.6
,
"drawStyle"
:
"line"
,
"fillOpacity"
:
0
,
"gradientMode"
:
"none"
,
"hideFrom"
:
{
"legend"
:
false
,
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
"scaleDistribution"
:
{
"type"
:
"linear"
},
"showPoints"
:
"auto"
,
"spanNulls"
:
false
,
"stacking"
:
{
"group"
:
"A"
,
"mode"
:
"none"
},
"thresholdsStyle"
:
{
"mode"
:
"off"
}
},
"mappings"
:
[],
"thresholds"
:
{
"mode"
:
"absolute"
,
"steps"
:
[
{
"color"
:
"green"
},
{
"color"
:
"red"
,
"value"
:
80
}
]
}
},
"overrides"
:
[]
},
"gridPos"
:
{
"h"
:
8
,
"w"
:
12
,
"x"
:
0
,
"y"
:
18
},
"id"
:
1
,
"options"
:
{
"legend"
:
{
"calcs"
:
[],
"displayMode"
:
"list"
,
"placement"
:
"bottom"
,
"showLegend"
:
true
},
"tooltip"
:
{
"hideZeros"
:
false
,
"mode"
:
"single"
,
"sort"
:
"none"
}
},
"pluginVersion"
:
"12.0.1"
,
"targets"
:
[
{
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"dynamo_component_save_kv_layer_requests{dynamo_namespace=
\"
kvbm_connector_worker
\"
}"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"legendFormat"
:
"__auto"
,
"range"
:
true
,
"refId"
:
"A"
,
"useBackend"
:
false
}
],
"title"
:
"Save KV Layer Requests"
,
"type"
:
"timeseries"
},
{
"collapsed"
:
false
,
"gridPos"
:
{
"h"
:
1
,
"w"
:
24
,
"x"
:
0
,
"y"
:
26
},
"id"
:
6
,
"panels"
:
[],
"title"
:
"Onboard"
,
"type"
:
"row"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"P1809F7CD0C75ACF3"
},
"fieldConfig"
:
{
"defaults"
:
{
"color"
:
{
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
"axisPlacement"
:
"auto"
,
"barAlignment"
:
0
,
"barWidthFactor"
:
0.6
,
"drawStyle"
:
"line"
,
"fillOpacity"
:
0
,
"gradientMode"
:
"none"
,
"hideFrom"
:
{
"legend"
:
false
,
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
"scaleDistribution"
:
{
"type"
:
"linear"
},
"showPoints"
:
"auto"
,
"spanNulls"
:
false
,
"stacking"
:
{
"group"
:
"A"
,
"mode"
:
"none"
},
"thresholdsStyle"
:
{
"mode"
:
"off"
}
},
"mappings"
:
[],
"thresholds"
:
{
"mode"
:
"absolute"
,
"steps"
:
[
{
"color"
:
"green"
},
{
"color"
:
"red"
,
"value"
:
80
}
]
}
},
"overrides"
:
[]
},
"gridPos"
:
{
"h"
:
8
,
"w"
:
12
,
"x"
:
0
,
"y"
:
27
},
"id"
:
9
,
"options"
:
{
"legend"
:
{
"calcs"
:
[],
"displayMode"
:
"list"
,
"placement"
:
"bottom"
,
"showLegend"
:
true
},
"tooltip"
:
{
"hideZeros"
:
false
,
"mode"
:
"single"
,
"sort"
:
"none"
}
},
"pluginVersion"
:
"12.0.1"
,
"targets"
:
[
{
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"dynamo_component_onboard_requests{dynamo_namespace=
\"
kvbm_connector_leader
\"
}"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"legendFormat"
:
"__auto"
,
"range"
:
true
,
"refId"
:
"A"
,
"useBackend"
:
false
}
],
"title"
:
"Onboard Requests"
,
"type"
:
"timeseries"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"P1809F7CD0C75ACF3"
},
"fieldConfig"
:
{
"defaults"
:
{
"color"
:
{
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
"axisPlacement"
:
"auto"
,
"barAlignment"
:
0
,
"barWidthFactor"
:
0.6
,
"drawStyle"
:
"line"
,
"fillOpacity"
:
0
,
"gradientMode"
:
"none"
,
"hideFrom"
:
{
"legend"
:
false
,
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
"scaleDistribution"
:
{
"type"
:
"linear"
},
"showPoints"
:
"auto"
,
"spanNulls"
:
false
,
"stacking"
:
{
"group"
:
"A"
,
"mode"
:
"none"
},
"thresholdsStyle"
:
{
"mode"
:
"off"
}
},
"mappings"
:
[],
"thresholds"
:
{
"mode"
:
"absolute"
,
"steps"
:
[
{
"color"
:
"green"
},
{
"color"
:
"red"
,
"value"
:
80
}
]
}
},
"overrides"
:
[]
},
"gridPos"
:
{
"h"
:
8
,
"w"
:
12
,
"x"
:
12
,
"y"
:
27
},
"id"
:
4
,
"options"
:
{
"legend"
:
{
"calcs"
:
[],
"displayMode"
:
"list"
,
"placement"
:
"bottom"
,
"showLegend"
:
true
},
"tooltip"
:
{
"hideZeros"
:
false
,
"mode"
:
"single"
,
"sort"
:
"none"
}
},
"pluginVersion"
:
"12.0.1"
,
"targets"
:
[
{
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"dynamo_component_onboard_blocks_h2d{dynamo_namespace=
\"
kvbm_connector_leader
\"
}"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"legendFormat"
:
"__auto"
,
"range"
:
true
,
"refId"
:
"A"
,
"useBackend"
:
false
}
],
"title"
:
"Onboard Blocks - Host to Device"
,
"type"
:
"timeseries"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"P1809F7CD0C75ACF3"
},
"fieldConfig"
:
{
"defaults"
:
{
"color"
:
{
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
"axisPlacement"
:
"auto"
,
"barAlignment"
:
0
,
"barWidthFactor"
:
0.6
,
"drawStyle"
:
"line"
,
"fillOpacity"
:
0
,
"gradientMode"
:
"none"
,
"hideFrom"
:
{
"legend"
:
false
,
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
"scaleDistribution"
:
{
"type"
:
"linear"
},
"showPoints"
:
"auto"
,
"spanNulls"
:
false
,
"stacking"
:
{
"group"
:
"A"
,
"mode"
:
"none"
},
"thresholdsStyle"
:
{
"mode"
:
"off"
}
},
"mappings"
:
[],
"thresholds"
:
{
"mode"
:
"absolute"
,
"steps"
:
[
{
"color"
:
"green"
},
{
"color"
:
"red"
,
"value"
:
80
}
]
}
},
"overrides"
:
[]
},
"gridPos"
:
{
"h"
:
8
,
"w"
:
12
,
"x"
:
0
,
"y"
:
35
},
"id"
:
8
,
"options"
:
{
"legend"
:
{
"calcs"
:
[],
"displayMode"
:
"list"
,
"placement"
:
"bottom"
,
"showLegend"
:
true
},
"tooltip"
:
{
"hideZeros"
:
false
,
"mode"
:
"single"
,
"sort"
:
"none"
}
},
"pluginVersion"
:
"12.0.1"
,
"targets"
:
[
{
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"dynamo_component_onboard_blocks_d2d{dynamo_namespace=
\"
kvbm_connector_leader
\"
}"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"legendFormat"
:
"__auto"
,
"range"
:
true
,
"refId"
:
"A"
,
"useBackend"
:
false
}
],
"title"
:
"Onboard Blocks - Disk to Host"
,
"type"
:
"timeseries"
}
],
...
...
@@ -223,12 +742,12 @@
"list"
:
[]
},
"time"
:
{
"from"
:
"now-
15
m"
,
"from"
:
"now-
30
m"
,
"to"
:
"now"
},
"timepicker"
:
{},
"timezone"
:
"browser"
,
"title"
:
"KVBM Dashboard"
,
"uid"
:
"3f679257-70a5-402c-92b4-05382337b548"
,
"version"
:
7
}
"version"
:
5
}
\ No newline at end of file
lib/bindings/python/rust/llm/block_manager/vllm/connector/leader.rs
View file @
b39382ba
...
...
@@ -80,6 +80,7 @@ pub struct KvConnectorLeader {
inflight_requests
:
HashSet
<
String
>
,
onboarding_slots
:
HashSet
<
String
>
,
iteration_counter
:
u64
,
kvbm_metrics
:
KvbmMetrics
,
}
impl
KvConnectorLeader
{
...
...
@@ -114,12 +115,13 @@ impl KvConnectorLeader {
block_manager
.clone
(),
leader
,
drt
.clone
(),
kvbm_metrics
,
kvbm_metrics
.clone
()
,
),
block_size
,
inflight_requests
:
HashSet
::
new
(),
onboarding_slots
:
HashSet
::
new
(),
iteration_counter
:
0
,
kvbm_metrics
,
}
}
}
...
...
@@ -188,6 +190,9 @@ impl Leader for KvConnectorLeader {
"scheduling onboarding for {} external tokens"
,
num_external_tokens
);
self
.kvbm_metrics
.matched_tokens
.inc_by
(
num_external_tokens
as
u64
);
Ok
((
num_external_tokens
,
true
))
}
else
{
Ok
((
0
,
false
))
...
...
lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/recorder.rs
View file @
b39382ba
...
...
@@ -124,12 +124,13 @@ impl KvConnectorLeaderRecorder {
block_manager
.clone
(),
leader
,
drt
.clone
(),
kvbm_metrics
,
kvbm_metrics
.clone
()
,
),
block_size
,
inflight_requests
:
HashSet
::
new
(),
onboarding_slots
:
HashSet
::
new
(),
iteration_counter
:
0
,
kvbm_metrics
,
};
let
(
unbounded_tx
,
unbounded_rx
)
=
mpsc
::
unbounded_channel
();
...
...
lib/bindings/python/rust/llm/block_manager/vllm/connector/leader/slot.rs
View file @
b39382ba
...
...
@@ -197,7 +197,7 @@ impl<R: RequestKey> ConnectorSlotManager<R> {
let
xfer_engine_task
=
CriticalTaskExecutionHandle
::
new_with_runtime
(
|
cancellation_token
|
async
move
{
xfer_engine
.execute
(
cancellation_token
,
drt_for_task
,
kvbm_metrics
.clone
()
)
.execute
(
cancellation_token
,
drt_for_task
,
kvbm_metrics
)
.await
},
primary_token
,
...
...
@@ -1042,6 +1042,9 @@ impl LocalTransferEngine {
let
leader_offload
=
Arc
::
clone
(
&
self
.leader
);
let
leader_onboard
=
Arc
::
clone
(
&
self
.leader
);
let
kvbm_metrics_onboard
=
kvbm_metrics
.clone
();
let
kvbm_metrics_offload
=
kvbm_metrics
.clone
();
let
onboard_task
=
CriticalTaskExecutionHandle
::
new_with_runtime
(
|
cancellation_token_onboard
|
async
move
{
while
let
Some
(
req
)
=
onboard_rx
.recv
()
.await
{
...
...
@@ -1049,7 +1052,10 @@ impl LocalTransferEngine {
tracing
::
debug!
(
"LocalOnboardTask: received cancellation signal"
);
break
;
}
if
let
Err
(
e
)
=
process_onboard_request
(
req
,
&
leader_onboard
)
.await
{
if
let
Err
(
e
)
=
process_onboard_request
(
req
,
&
leader_onboard
,
kvbm_metrics_onboard
.clone
())
.await
{
tracing
::
error!
(
"LocalOnboardTask: error processing request: {:?}"
,
e
);
}
}
...
...
@@ -1071,7 +1077,7 @@ impl LocalTransferEngine {
req
,
&
block_manager_offload
,
&
leader_offload
,
kvbm_metrics
.clone
(),
kvbm_metrics
_offload
.clone
(),
)
.await
{
...
...
@@ -1145,6 +1151,9 @@ async fn process_offload_request(
kvbm_metrics
:
KvbmMetrics
,
)
->
anyhow
::
Result
<
()
>
{
kvbm_metrics
.offload_requests
.inc
();
kvbm_metrics
.offload_blocks_d2h
.inc_by
(
offload_req
.block_ids
.len
()
as
u64
);
let
request_id
=
&
offload_req
.request_id
;
let
operation_id
=
&
offload_req
.operation_id
;
...
...
@@ -1154,7 +1163,6 @@ async fn process_offload_request(
offload_req
.block_ids
.len
()
);
// TODO: Implement actual offload logic
// 1. Acquire mutable host blocks
let
host_blocks
=
block_manager
.host
()
...
...
@@ -1250,7 +1258,19 @@ async fn process_offload_request(
async
fn
process_onboard_request
(
onboard_req
:
LocalOnboardRequest
,
leader
:
&
Arc
<
KvbmLeader
>
,
kvbm_metrics
:
KvbmMetrics
,
)
->
anyhow
::
Result
<
()
>
{
kvbm_metrics
.onboard_requests
.inc
();
if
onboard_req
.src_blocks
.storage_pool
()
==
BlockTransferPool
::
Host
{
kvbm_metrics
.onboard_blocks_h2d
.inc_by
(
onboard_req
.src_blocks
.len
()
as
u64
);
}
else
if
onboard_req
.src_blocks
.storage_pool
()
==
BlockTransferPool
::
Disk
{
kvbm_metrics
.onboard_blocks_d2d
.inc_by
(
onboard_req
.src_blocks
.len
()
as
u64
);
}
let
request_id
=
&
onboard_req
.request_id
;
let
operation_id
=
&
onboard_req
.operation_id
;
...
...
lib/bindings/python/rust/llm/block_manager/vllm/connector/worker.rs
View file @
b39382ba
...
...
@@ -265,7 +265,6 @@ impl Worker for KvConnectorWorker {
/// Trigger layer-wise completion signals.
/// Trigger block-wise completion signals afer last layer.
fn
save_kv_layer
(
&
mut
self
,
_
layer_name
:
String
)
->
anyhow
::
Result
<
()
>
{
self
.kvbm_metrics.save_kv_layer_requests
.inc
();
self
.layers_complete
+=
1
;
if
self
.layers_complete
==
self
.kv_cache_layers
.len
()
{
let
offloading_operations
=
std
::
mem
::
take
(
&
mut
self
.offloading_operations
);
...
...
@@ -278,6 +277,7 @@ impl Worker for KvConnectorWorker {
self
.connector
.enqueue_request
(
operation
);
}
}
self
.kvbm_metrics.save_kv_layer_requests
.inc
();
Ok
(())
}
...
...
lib/llm/src/block_manager/metrics_kvbm.rs
View file @
b39382ba
...
...
@@ -6,8 +6,26 @@ use prometheus::IntCounter;
#[derive(Clone,
Debug)]
pub
struct
KvbmMetrics
{
// number of offload requests
pub
offload_requests
:
IntCounter
,
// number of blocks offloaded from device to host
pub
offload_blocks_d2h
:
IntCounter
,
// number of onboard requests
pub
onboard_requests
:
IntCounter
,
// number of blocks onboarded from host to device
pub
onboard_blocks_h2d
:
IntCounter
,
// number of blocks onboarded from disk to device
pub
onboard_blocks_d2d
:
IntCounter
,
// number of save kv layer requests
pub
save_kv_layer_requests
:
IntCounter
,
// number of matched tokens from KVBM
pub
matched_tokens
:
IntCounter
,
}
impl
KvbmMetrics
{
...
...
@@ -15,6 +33,30 @@ impl KvbmMetrics {
let
offload_requests
=
mr
.create_intcounter
(
"offload_requests"
,
"The number of offload requests"
,
&
[])
.unwrap
();
let
offload_blocks_d2h
=
mr
.create_intcounter
(
"offload_blocks_d2h"
,
"The number of offload blocks from device to host"
,
&
[],
)
.unwrap
();
let
onboard_requests
=
mr
.create_intcounter
(
"onboard_requests"
,
"The number of onboard requests"
,
&
[])
.unwrap
();
let
onboard_blocks_h2d
=
mr
.create_intcounter
(
"onboard_blocks_h2d"
,
"The number of onboard blocks from host to device"
,
&
[],
)
.unwrap
();
let
onboard_blocks_d2d
=
mr
.create_intcounter
(
"onboard_blocks_d2d"
,
"The number of onboard blocks from disk to device"
,
&
[],
)
.unwrap
();
let
save_kv_layer_requests
=
mr
.create_intcounter
(
"save_kv_layer_requests"
,
...
...
@@ -22,9 +64,17 @@ impl KvbmMetrics {
&
[],
)
.unwrap
();
let
matched_tokens
=
mr
.create_intcounter
(
"matched_tokens"
,
"The number of matched tokens"
,
&
[])
.unwrap
();
Self
{
offload_requests
,
offload_blocks_d2h
,
onboard_requests
,
onboard_blocks_h2d
,
onboard_blocks_d2d
,
save_kv_layer_requests
,
matched_tokens
,
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment