Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
e42746a1
Unverified
Commit
e42746a1
authored
Sep 10, 2025
by
Michael Feil
Committed by
GitHub
Sep 10, 2025
Browse files
feat: frontend, disconnect metrics (#1) (#2953)
Signed-off-by:
michaelfeil
<
me@michaelfeil.eu
>
parent
4b3a2c1a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
37 additions
and
5 deletions
+37
-5
lib/llm/src/grpc/service/openai.rs
lib/llm/src/grpc/service/openai.rs
+2
-1
lib/llm/src/http/service/disconnect.rs
lib/llm/src/http/service/disconnect.rs
+10
-1
lib/llm/src/http/service/metrics.rs
lib/llm/src/http/service/metrics.rs
+19
-0
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+6
-3
No files found.
lib/llm/src/grpc/service/openai.rs
View file @
e42746a1
...
@@ -52,7 +52,8 @@ pub async fn completion_response_stream(
...
@@ -52,7 +52,8 @@ pub async fn completion_response_stream(
let
context
=
request
.context
();
let
context
=
request
.context
();
// create the connection handles
// create the connection handles
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
())
.await
;
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
(),
Some
(
state
.metrics_clone
()))
.await
;
let
streaming
=
request
.inner.stream
.unwrap_or
(
false
);
let
streaming
=
request
.inner.stream
.unwrap_or
(
false
);
// update the request to always stream
// update the request to always stream
...
...
lib/llm/src/http/service/disconnect.rs
View file @
e42746a1
...
@@ -33,7 +33,7 @@ use dynamo_runtime::engine::AsyncEngineContext;
...
@@ -33,7 +33,7 @@ use dynamo_runtime::engine::AsyncEngineContext;
use
futures
::{
Stream
,
StreamExt
};
use
futures
::{
Stream
,
StreamExt
};
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
crate
::
http
::
service
::
metrics
::
InflightGuard
;
use
crate
::
http
::
service
::
metrics
::
{
InflightGuard
,
Metrics
}
;
#[derive(Clone,
Copy)]
#[derive(Clone,
Copy)]
pub
enum
ConnectionStatus
{
pub
enum
ConnectionStatus
{
...
@@ -99,6 +99,7 @@ impl Drop for ConnectionHandle {
...
@@ -99,6 +99,7 @@ impl Drop for ConnectionHandle {
/// The handles are returned in the order of the first being armed and the second being disarmed.
/// The handles are returned in the order of the first being armed and the second being disarmed.
pub
async
fn
create_connection_monitor
(
pub
async
fn
create_connection_monitor
(
engine_context
:
Arc
<
dyn
AsyncEngineContext
>
,
engine_context
:
Arc
<
dyn
AsyncEngineContext
>
,
metrics
:
Option
<
Arc
<
Metrics
>>
,
)
->
(
ConnectionHandle
,
ConnectionHandle
)
{
)
->
(
ConnectionHandle
,
ConnectionHandle
)
{
// these oneshot channels monitor possible disconnects from the client in two different scopes:
// these oneshot channels monitor possible disconnects from the client in two different scopes:
// - the local task (connection_handle)
// - the local task (connection_handle)
...
@@ -111,6 +112,7 @@ pub async fn create_connection_monitor(
...
@@ -111,6 +112,7 @@ pub async fn create_connection_monitor(
engine_context
.clone
(),
engine_context
.clone
(),
connection_rx
,
connection_rx
,
stream_rx
,
stream_rx
,
metrics
,
));
));
// Two handles, the first is armed, the second is disarmed
// Two handles, the first is armed, the second is disarmed
...
@@ -125,11 +127,15 @@ async fn connection_monitor(
...
@@ -125,11 +127,15 @@ async fn connection_monitor(
engine_context
:
Arc
<
dyn
AsyncEngineContext
>
,
engine_context
:
Arc
<
dyn
AsyncEngineContext
>
,
connection_rx
:
tokio
::
sync
::
oneshot
::
Receiver
<
ConnectionStatus
>
,
connection_rx
:
tokio
::
sync
::
oneshot
::
Receiver
<
ConnectionStatus
>
,
stream_rx
:
tokio
::
sync
::
oneshot
::
Receiver
<
ConnectionStatus
>
,
stream_rx
:
tokio
::
sync
::
oneshot
::
Receiver
<
ConnectionStatus
>
,
metrics
:
Option
<
Arc
<
Metrics
>>
,
)
{
)
{
match
connection_rx
.await
{
match
connection_rx
.await
{
Err
(
_
)
|
Ok
(
ConnectionStatus
::
ClosedUnexpectedly
)
=>
{
Err
(
_
)
|
Ok
(
ConnectionStatus
::
ClosedUnexpectedly
)
=>
{
// the client has disconnected, no need to gracefully cancel, just kill the context
// the client has disconnected, no need to gracefully cancel, just kill the context
tracing
::
trace!
(
"Connection closed unexpectedly; issuing cancellation"
);
tracing
::
trace!
(
"Connection closed unexpectedly; issuing cancellation"
);
if
let
Some
(
metrics
)
=
&
metrics
{
metrics
.inc_client_disconnect
();
}
engine_context
.kill
();
engine_context
.kill
();
}
}
Ok
(
ConnectionStatus
::
ClosedGracefully
)
=>
{
Ok
(
ConnectionStatus
::
ClosedGracefully
)
=>
{
...
@@ -141,6 +147,9 @@ async fn connection_monitor(
...
@@ -141,6 +147,9 @@ async fn connection_monitor(
match
stream_rx
.await
{
match
stream_rx
.await
{
Err
(
_
)
|
Ok
(
ConnectionStatus
::
ClosedUnexpectedly
)
=>
{
Err
(
_
)
|
Ok
(
ConnectionStatus
::
ClosedUnexpectedly
)
=>
{
tracing
::
trace!
(
"Stream closed unexpectedly; issuing cancellation"
);
tracing
::
trace!
(
"Stream closed unexpectedly; issuing cancellation"
);
if
let
Some
(
metrics
)
=
&
metrics
{
metrics
.inc_client_disconnect
();
}
engine_context
.kill
();
engine_context
.kill
();
}
}
Ok
(
ConnectionStatus
::
ClosedGracefully
)
=>
{
Ok
(
ConnectionStatus
::
ClosedGracefully
)
=>
{
...
...
lib/llm/src/http/service/metrics.rs
View file @
e42746a1
...
@@ -18,6 +18,7 @@ use super::RouteDoc;
...
@@ -18,6 +18,7 @@ use super::RouteDoc;
pub
struct
Metrics
{
pub
struct
Metrics
{
request_counter
:
IntCounterVec
,
request_counter
:
IntCounterVec
,
inflight_gauge
:
IntGaugeVec
,
inflight_gauge
:
IntGaugeVec
,
client_disconnect_gauge
:
prometheus
::
IntGauge
,
request_duration
:
HistogramVec
,
request_duration
:
HistogramVec
,
input_sequence_length
:
HistogramVec
,
input_sequence_length
:
HistogramVec
,
output_sequence_length
:
HistogramVec
,
output_sequence_length
:
HistogramVec
,
...
@@ -133,6 +134,12 @@ impl Metrics {
...
@@ -133,6 +134,12 @@ impl Metrics {
)
)
.unwrap
();
.unwrap
();
let
client_disconnect_gauge
=
prometheus
::
IntGauge
::
new
(
frontend_metric_name
(
"client_disconnects"
),
"Number of connections dropped by clients"
,
)
.unwrap
();
let
buckets
=
vec!
[
0.0
,
1.0
,
2.0
,
4.0
,
8.0
,
16.0
,
32.0
,
64.0
,
128.0
,
256.0
];
let
buckets
=
vec!
[
0.0
,
1.0
,
2.0
,
4.0
,
8.0
,
16.0
,
32.0
,
64.0
,
128.0
,
256.0
];
let
request_duration
=
HistogramVec
::
new
(
let
request_duration
=
HistogramVec
::
new
(
...
@@ -198,6 +205,7 @@ impl Metrics {
...
@@ -198,6 +205,7 @@ impl Metrics {
Metrics
{
Metrics
{
request_counter
,
request_counter
,
inflight_gauge
,
inflight_gauge
,
client_disconnect_gauge
,
request_duration
,
request_duration
,
input_sequence_length
,
input_sequence_length
,
output_sequence_length
,
output_sequence_length
,
...
@@ -263,9 +271,20 @@ impl Metrics {
...
@@ -263,9 +271,20 @@ impl Metrics {
self
.inflight_gauge
.with_label_values
(
&
[
model
])
.dec
()
self
.inflight_gauge
.with_label_values
(
&
[
model
])
.dec
()
}
}
/// Increment the gauge for client disconnections
pub
fn
inc_client_disconnect
(
&
self
)
{
self
.client_disconnect_gauge
.inc
();
}
/// Get the count of client disconnections
pub
fn
get_client_disconnect_count
(
&
self
)
->
i64
{
self
.client_disconnect_gauge
.get
()
}
pub
fn
register
(
&
self
,
registry
:
&
Registry
)
->
Result
<
(),
prometheus
::
Error
>
{
pub
fn
register
(
&
self
,
registry
:
&
Registry
)
->
Result
<
(),
prometheus
::
Error
>
{
registry
.register
(
Box
::
new
(
self
.request_counter
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.request_counter
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.inflight_gauge
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.inflight_gauge
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.client_disconnect_gauge
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.request_duration
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.request_duration
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.input_sequence_length
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.input_sequence_length
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.output_sequence_length
.clone
()))
?
;
registry
.register
(
Box
::
new
(
self
.output_sequence_length
.clone
()))
?
;
...
...
lib/llm/src/http/service/openai.rs
View file @
e42746a1
...
@@ -224,7 +224,8 @@ async fn handler_completions(
...
@@ -224,7 +224,8 @@ async fn handler_completions(
let
context
=
request
.context
();
let
context
=
request
.context
();
// create the connection handles
// create the connection handles
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
())
.await
;
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
(),
Some
(
state
.metrics_clone
()))
.await
;
// possibly long running task
// possibly long running task
// if this returns a streaming response, the stream handle will be armed and captured by the response stream
// if this returns a streaming response, the stream handle will be armed and captured by the response stream
...
@@ -419,7 +420,8 @@ async fn handler_chat_completions(
...
@@ -419,7 +420,8 @@ async fn handler_chat_completions(
let
context
=
request
.context
();
let
context
=
request
.context
();
// create the connection handles
// create the connection handles
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
())
.await
;
let
(
mut
connection_handle
,
stream_handle
)
=
create_connection_monitor
(
context
.clone
(),
Some
(
state
.metrics_clone
()))
.await
;
let
response
=
let
response
=
tokio
::
spawn
(
chat_completions
(
state
,
template
,
request
,
stream_handle
)
.in_current_span
())
tokio
::
spawn
(
chat_completions
(
state
,
template
,
request
,
stream_handle
)
.in_current_span
())
...
@@ -651,7 +653,8 @@ async fn handler_responses(
...
@@ -651,7 +653,8 @@ async fn handler_responses(
let
context
=
request
.context
();
let
context
=
request
.context
();
// create the connection handles
// create the connection handles
let
(
mut
connection_handle
,
_
stream_handle
)
=
create_connection_monitor
(
context
.clone
())
.await
;
let
(
mut
connection_handle
,
_
stream_handle
)
=
create_connection_monitor
(
context
.clone
(),
Some
(
state
.metrics_clone
()))
.await
;
let
response
=
tokio
::
spawn
(
responses
(
state
,
template
,
request
)
.in_current_span
())
let
response
=
tokio
::
spawn
(
responses
(
state
,
template
,
request
)
.in_current_span
())
.await
.await
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment