Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
43ff14ce
Commit
43ff14ce
authored
Mar 20, 2025
by
Yan Ru Pei
Committed by
GitHub
Mar 20, 2025
Browse files
chore: KV router Pythonic cleanups (#324)
parent
bb35f36f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
17 deletions
+31
-17
examples/llm/components/kv_router.py
examples/llm/components/kv_router.py
+31
-17
No files found.
examples/llm/components/kv_router.py
View file @
43ff14ce
...
@@ -113,6 +113,23 @@ class Router:
...
@@ -113,6 +113,23 @@ class Router:
metrics
:
AggregatedMetrics
|
None
,
metrics
:
AggregatedMetrics
|
None
,
token_length
:
int
,
token_length
:
int
,
):
):
"""The cost function for deciding the best worker to route a request to.
If there are multiple workers sharing the same optimal cost, then
one of them is randomly selected.
Args:
scores (OverlapScores | None): The number of matching blocks between
the request and the prefix cache of each worker.
metrics (AggregatedMetrics | None): Several worker metrics polled
by the `KvMetricsAggregator`, currently including the
GPU cache usage, number of waiting requests, and the
GPU prefix cache hit rate.
token_length (int): The number of tokens in the request.
Returns:
(str, float): The best worker id and the corresponding score.
"""
worker_scores
=
{}
worker_scores
=
{}
if
scores
:
if
scores
:
for
worker_id
,
score
in
scores
.
scores
.
items
():
for
worker_id
,
score
in
scores
.
scores
.
items
():
...
@@ -129,15 +146,15 @@ class Router:
...
@@ -129,15 +146,15 @@ class Router:
for
endpoint
in
metrics
.
endpoints
:
for
endpoint
in
metrics
.
endpoints
:
worker_id
=
endpoint
.
worker_id
worker_id
=
endpoint
.
worker_id
worker_metrics
[
worker_id
]
=
{
worker_metrics
[
worker_id
]
=
{
"gpu_cache_usage_perc"
:
endpoint
.
gpu_cache_usage_perc
"gpu_cache_usage_perc"
:
getattr
(
if
hasattr
(
endpoint
,
"gpu_cache_usage_perc"
)
endpoint
,
"gpu_cache_usage_perc"
,
0.0
else
0.0
,
)
,
"num_requests_waiting"
:
endpoint
.
num_requests_waiting
"num_requests_waiting"
:
getattr
(
if
hasattr
(
endpoint
,
"num_requests_waiting"
)
endpoint
,
"num_requests_waiting"
,
0.0
else
0.0
,
)
,
"gpu_prefix_cache_hit_rate"
:
endpoint
.
gpu_prefix_cache_hit_rate
"gpu_prefix_cache_hit_rate"
:
getattr
(
if
hasattr
(
endpoint
,
"gpu_prefix_cache_hit_rate"
)
endpoint
,
"gpu_prefix_cache_hit_rate"
,
0.0
else
0.0
,
)
,
}
}
max_waiting
=
max
(
max_waiting
=
max
(
max_waiting
,
worker_metrics
[
worker_id
][
"num_requests_waiting"
]
max_waiting
,
worker_metrics
[
worker_id
][
"num_requests_waiting"
]
...
@@ -179,14 +196,11 @@ class Router:
...
@@ -179,14 +196,11 @@ class Router:
return
""
return
""
# Select the worker with the highest logit
# Select the worker with the highest logit
if
worker_logits
:
max_logit
=
max
(
worker_logits
.
values
())
max_logit
=
max
(
worker_logits
.
values
())
best_workers
=
[
best_workers
=
[
wid
for
wid
,
logit
in
worker_logits
.
items
()
if
logit
==
max_logit
wid
for
wid
,
logit
in
worker_logits
.
items
()
if
logit
==
max_logit
]
]
best_worker_id
=
random
.
choice
(
best_workers
)
best_worker_id
=
random
.
choice
(
best_workers
)
else
:
best_worker_id
=
""
# Log the metrics for the selected worker
# Log the metrics for the selected worker
if
best_worker_id
:
if
best_worker_id
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment