Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5b24b429
Unverified
Commit
5b24b429
authored
Dec 03, 2025
by
Yan Ru Pei
Committed by
GitHub
Dec 04, 2025
Browse files
chore: no need to arc wrap client (#4741)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
0651a4fe
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
10 deletions
+7
-10
lib/bindings/c/src/lib.rs
lib/bindings/c/src/lib.rs
+1
-1
lib/llm/src/discovery/model_manager.rs
lib/llm/src/discovery/model_manager.rs
+1
-1
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+2
-5
lib/llm/src/discovery/worker_monitor.rs
lib/llm/src/discovery/worker_monitor.rs
+3
-3
No files found.
lib/bindings/c/src/lib.rs
View file @
5b24b429
...
@@ -1031,7 +1031,7 @@ pub async fn create_worker_selection_pipeline_chat(
...
@@ -1031,7 +1031,7 @@ pub async fn create_worker_selection_pipeline_chat(
// Create worker monitor if busy_threshold is set
// Create worker monitor if busy_threshold is set
// Note: C bindings don't register with ModelManager, so HTTP endpoint won't see this
// Note: C bindings don't register with ModelManager, so HTTP endpoint won't see this
let
worker_monitor
=
busy_threshold
.map
(|
t
|
KvWorkerMonitor
::
new
(
Arc
::
new
(
client
.clone
()
)
,
t
));
let
worker_monitor
=
busy_threshold
.map
(|
t
|
KvWorkerMonitor
::
new
(
client
.clone
(),
t
));
let
engine
=
build_routed_pipeline
::
<
let
engine
=
build_routed_pipeline
::
<
NvCreateChatCompletionRequest
,
NvCreateChatCompletionRequest
,
...
...
lib/llm/src/discovery/model_manager.rs
View file @
5b24b429
...
@@ -531,7 +531,7 @@ impl ModelManager {
...
@@ -531,7 +531,7 @@ impl ModelManager {
pub
fn
get_or_create_worker_monitor
(
pub
fn
get_or_create_worker_monitor
(
&
self
,
&
self
,
model
:
&
str
,
model
:
&
str
,
client
:
Arc
<
Client
>
,
client
:
Client
,
threshold
:
f64
,
threshold
:
f64
,
)
->
KvWorkerMonitor
{
)
->
KvWorkerMonitor
{
let
mut
monitors
=
self
.worker_monitors
.write
();
let
mut
monitors
=
self
.worker_monitors
.write
();
...
...
lib/llm/src/discovery/watcher.rs
View file @
5b24b429
...
@@ -405,11 +405,8 @@ impl ModelWatcher {
...
@@ -405,11 +405,8 @@ impl ModelWatcher {
// Get or create the worker monitor for this model
// Get or create the worker monitor for this model
// This allows dynamic threshold updates via the ModelManager
// This allows dynamic threshold updates via the ModelManager
let
worker_monitor
=
self
.router_config.busy_threshold
.map
(|
threshold
|
{
let
worker_monitor
=
self
.router_config.busy_threshold
.map
(|
threshold
|
{
self
.manager
.get_or_create_worker_monitor
(
self
.manager
card
.name
(),
.get_or_create_worker_monitor
(
card
.name
(),
client
.clone
(),
threshold
)
Arc
::
new
(
client
.clone
()),
threshold
,
)
});
});
// Add chat engine only if the model supports chat
// Add chat engine only if the model supports chat
...
...
lib/llm/src/discovery/worker_monitor.rs
View file @
5b24b429
...
@@ -55,11 +55,11 @@ impl WorkerLoadState {
...
@@ -55,11 +55,11 @@ impl WorkerLoadState {
/// Worker monitor for tracking KV cache usage and busy states.
/// Worker monitor for tracking KV cache usage and busy states.
///
///
///
All fields are `Arc`, so cloning shares state
. This allows multiple pipelines
///
Cloning shares state via internal Arc-wrapped fields
. This allows multiple pipelines
/// (e.g., chat and completions) to share the same monitor instance.
/// (e.g., chat and completions) to share the same monitor instance.
#[derive(Clone)]
#[derive(Clone)]
pub
struct
KvWorkerMonitor
{
pub
struct
KvWorkerMonitor
{
client
:
Arc
<
Client
>
,
client
:
Client
,
worker_load_states
:
Arc
<
RwLock
<
HashMap
<
u64
,
WorkerLoadState
>>>
,
worker_load_states
:
Arc
<
RwLock
<
HashMap
<
u64
,
WorkerLoadState
>>>
,
/// Threshold stored as parts-per-10000 (e.g., 8500 = 0.85)
/// Threshold stored as parts-per-10000 (e.g., 8500 = 0.85)
busy_threshold
:
Arc
<
AtomicU32
>
,
busy_threshold
:
Arc
<
AtomicU32
>
,
...
@@ -72,7 +72,7 @@ impl KvWorkerMonitor {
...
@@ -72,7 +72,7 @@ impl KvWorkerMonitor {
///
///
/// The threshold (0.0-1.0) controls when workers are considered busy based on
/// The threshold (0.0-1.0) controls when workers are considered busy based on
/// KV cache utilization. It can be dynamically updated via `set_threshold()`.
/// KV cache utilization. It can be dynamically updated via `set_threshold()`.
pub
fn
new
(
client
:
Arc
<
Client
>
,
threshold
:
f64
)
->
Self
{
pub
fn
new
(
client
:
Client
,
threshold
:
f64
)
->
Self
{
Self
{
Self
{
client
,
client
,
worker_load_states
:
Arc
::
new
(
RwLock
::
new
(
HashMap
::
new
())),
worker_load_states
:
Arc
::
new
(
RwLock
::
new
(
HashMap
::
new
())),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment