Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2d3fb39f
Unverified
Commit
2d3fb39f
authored
Jun 25, 2025
by
jain-ria
Committed by
GitHub
Jun 25, 2025
Browse files
fix: remove http endpoint for clearing kv blocks (#1629)
parent
e84b1e77
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
32 deletions
+7
-32
launch/dynamo-run/src/input/http.rs
launch/dynamo-run/src/input/http.rs
+0
-2
lib/llm/src/http/service.rs
lib/llm/src/http/service.rs
+0
-1
lib/llm/src/http/service/clear_kv_blocks.rs
lib/llm/src/http/service/clear_kv_blocks.rs
+6
-4
lib/llm/src/http/service/service_v2.rs
lib/llm/src/http/service/service_v2.rs
+1
-25
No files found.
launch/dynamo-run/src/input/http.rs
View file @
2d3fb39f
...
...
@@ -29,14 +29,12 @@ pub async fn run(
engine_config
:
EngineConfig
,
template
:
Option
<
RequestTemplate
>
,
)
->
anyhow
::
Result
<
()
>
{
let
distributed_runtime
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
let
http_service
=
service_v2
::
HttpService
::
builder
()
.port
(
flags
.http_port
)
.enable_chat_endpoints
(
true
)
.enable_cmpl_endpoints
(
true
)
.enable_embeddings_endpoints
(
true
)
.with_request_template
(
template
)
.runtime
(
Some
(
Arc
::
new
(
distributed_runtime
)))
.build
()
?
;
match
engine_config
{
EngineConfig
::
Dynamic
=>
{
...
...
lib/llm/src/http/service.rs
View file @
2d3fb39f
...
...
@@ -20,7 +20,6 @@
mod
openai
;
pub
mod
clear_kv_blocks
;
pub
mod
error
;
pub
mod
health
;
pub
mod
metrics
;
...
...
lib/llm/src/http/service/clear_kv_blocks.rs
View file @
2d3fb39f
...
...
@@ -20,6 +20,8 @@ use std::sync::Arc;
use
dynamo_runtime
::{
pipeline
::
PushRouter
,
stream
::
StreamExt
};
pub
const
CLEAR_KV_ENDPOINT
:
&
str
=
"clear_kv_blocks"
;
pub
fn
clear_kv_blocks_router
(
state
:
Arc
<
service_v2
::
State
>
,
path
:
Option
<
String
>
,
...
...
@@ -68,7 +70,7 @@ async fn clear_kv_blocks_handler(
message
:
Option
<
String
>
|
{
let
mut
result
=
json!
({
"name"
:
name
,
"endpoint"
:
format!
(
"{}/{}/
clear_kv_blocks"
,
ns
,
comp
),
"endpoint"
:
format!
(
"{}/{}/
{}"
,
ns
,
comp
,
CLEAR_KV_ENDPOINT
),
"status"
:
status
,
});
if
success
{
...
...
@@ -123,7 +125,7 @@ async fn clear_kv_blocks_handler(
};
let
endpoint
:
dynamo_runtime
::
component
::
Endpoint
=
component_obj
.endpoint
(
"clear_kv_blocks"
);
component_obj
.endpoint
(
CLEAR_KV_ENDPOINT
);
let
client
=
match
endpoint
.client
()
.await
{
Ok
(
c
)
=>
c
,
...
...
@@ -190,7 +192,7 @@ async fn clear_kv_blocks_handler(
let
instances_filtered
=
instances
.clone
()
.into_iter
()
.filter
(|
instance
|
instance
.endpoint
==
"clear_kv_blocks"
)
.filter
(|
instance
|
instance
.endpoint
==
CLEAR_KV_ENDPOINT
)
.collect
::
<
Vec
<
_
>>
();
if
instances_filtered
.is_empty
()
{
...
...
@@ -214,7 +216,7 @@ async fn clear_kv_blocks_handler(
for
instance
in
&
instances_filtered
{
let
instance_name
=
format!
(
"{}-instance-{}"
,
entry
.name
,
instance
.id
());
match
router
.
round_robin
(()
.into
())
.await
{
match
router
.
direct
(()
.into
(),
instance
.id
())
.await
{
Ok
(
mut
stream
)
=>
match
stream
.next
()
.await
{
Some
(
response
)
=>
{
add_worker_result
(
...
...
lib/llm/src/http/service/service_v2.rs
View file @
2d3fb39f
...
...
@@ -11,7 +11,6 @@ use crate::discovery::ModelManager;
use
crate
::
request_template
::
RequestTemplate
;
use
anyhow
::
Result
;
use
derive_builder
::
Builder
;
use
dynamo_runtime
::
DistributedRuntime
;
use
tokio
::
task
::
JoinHandle
;
use
tokio_util
::
sync
::
CancellationToken
;
...
...
@@ -19,7 +18,6 @@ use tokio_util::sync::CancellationToken;
pub
struct
State
{
metrics
:
Arc
<
Metrics
>
,
manager
:
Arc
<
ModelManager
>
,
runtime
:
Option
<
Arc
<
DistributedRuntime
>>
,
}
impl
State
{
...
...
@@ -27,15 +25,6 @@ impl State {
Self
{
manager
,
metrics
:
Arc
::
new
(
Metrics
::
default
()),
runtime
:
None
,
}
}
pub
fn
with_runtime
(
manager
:
Arc
<
ModelManager
>
,
runtime
:
Arc
<
DistributedRuntime
>
)
->
Self
{
Self
{
manager
,
metrics
:
Arc
::
new
(
Metrics
::
default
()),
runtime
:
Some
(
runtime
),
}
}
...
...
@@ -52,11 +41,6 @@ impl State {
self
.manager
.clone
()
}
/// Get the DistributedRuntime if available
pub
fn
runtime
(
&
self
)
->
Option
<&
DistributedRuntime
>
{
self
.runtime
.as_ref
()
.map
(|
r
|
r
.as_ref
())
}
// TODO
pub
fn
sse_keep_alive
(
&
self
)
->
Option
<
Duration
>
{
None
...
...
@@ -96,9 +80,6 @@ pub struct HttpServiceConfig {
#[builder(default
=
"None"
)]
request_template
:
Option
<
RequestTemplate
>
,
#[builder(default
=
"None"
)]
runtime
:
Option
<
Arc
<
DistributedRuntime
>>
,
}
impl
HttpService
{
...
...
@@ -153,11 +134,7 @@ impl HttpServiceConfigBuilder {
let
config
:
HttpServiceConfig
=
self
.build_internal
()
?
;
let
model_manager
=
Arc
::
new
(
ModelManager
::
new
());
let
state
=
if
let
Some
(
runtime
)
=
config
.runtime
{
Arc
::
new
(
State
::
with_runtime
(
model_manager
,
runtime
))
}
else
{
Arc
::
new
(
State
::
new
(
model_manager
))
};
let
state
=
Arc
::
new
(
State
::
new
(
model_manager
));
// enable prometheus metrics
let
registry
=
metrics
::
Registry
::
new
();
...
...
@@ -171,7 +148,6 @@ impl HttpServiceConfigBuilder {
metrics
::
router
(
registry
,
None
),
super
::
openai
::
list_models_router
(
state
.clone
(),
None
),
super
::
health
::
health_check_router
(
state
.clone
(),
None
),
super
::
clear_kv_blocks
::
clear_kv_blocks_router
(
state
.clone
(),
None
),
];
if
config
.enable_chat_endpoints
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment