Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
ad8ad66b
Unverified
Commit
ad8ad66b
authored
Jul 14, 2025
by
Graham King
Committed by
GitHub
Jul 14, 2025
Browse files
feat: Shrink the ai-dynamo wheel by 35 MiB (#1918)
Remove http and llmctl binaries. They have been unused for a while.
parent
480b41d1
Changes
20
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1 addition
and
624 deletions
+1
-624
.devcontainer/post-create.sh
.devcontainer/post-create.sh
+0
-2
Cargo.lock
Cargo.lock
+0
-67
Cargo.toml
Cargo.toml
+0
-1
Earthfile
Earthfile
+1
-3
README.md
README.md
+0
-2
components/http/Cargo.toml
components/http/Cargo.toml
+0
-34
components/http/src/main.rs
components/http/src/main.rs
+0
-73
container/Dockerfile.none
container/Dockerfile.none
+0
-2
container/Dockerfile.sglang
container/Dockerfile.sglang
+0
-2
container/Dockerfile.sglang-deepep
container/Dockerfile.sglang-deepep
+0
-2
container/Dockerfile.tensorrt_llm
container/Dockerfile.tensorrt_llm
+0
-2
container/Dockerfile.vllm
container/Dockerfile.vllm
+0
-2
container/Dockerfile.vllm_v1
container/Dockerfile.vllm_v1
+0
-2
deploy/sdk/src/dynamo/sdk/cli/run_executable.py
deploy/sdk/src/dynamo/sdk/cli/run_executable.py
+0
-22
docs/get_started.md
docs/get_started.md
+0
-2
docs/guides/backend.md
docs/guides/backend.md
+0
-4
hatch_build.py
hatch_build.py
+0
-2
launch/llmctl/Cargo.toml
launch/llmctl/Cargo.toml
+0
-36
launch/llmctl/src/main.rs
launch/llmctl/src/main.rs
+0
-362
pyproject.toml
pyproject.toml
+0
-2
No files found.
.devcontainer/post-create.sh
View file @
ad8ad66b
...
...
@@ -55,8 +55,6 @@ cargo doc --no-deps
# create symlinks for the binaries in the deploy directory
mkdir
-p
$HOME
/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin
ln
-sf
$HOME
/dynamo/.build/target/debug/dynamo-run
$HOME
/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
ln
-sf
$HOME
/dynamo/.build/target/debug/http
$HOME
/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/http
ln
-sf
$HOME
/dynamo/.build/target/debug/llmctl
$HOME
/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/llmctl
# install the python bindings
cd
$HOME
/dynamo/lib/bindings/python
&&
retry maturin develop
...
...
Cargo.lock
View file @
ad8ad66b
...
...
@@ -714,12 +714,6 @@ version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytecount"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
[[package]]
name = "bytemuck"
version = "1.23.1"
...
...
@@ -2973,18 +2967,6 @@ dependencies = [
"itoa",
]
[[package]]
name = "http"
version = "0.3.2"
dependencies = [
"clap 4.5.40",
"dynamo-llm",
"dynamo-runtime",
"serde",
"serde_json",
"tokio",
]
[[package]]
name = "http"
version = "1.3.1"
...
...
@@ -3730,21 +3712,6 @@ dependencies = [
"toktrie 0.7.29",
]
[[package]]
name = "llmctl"
version = "0.3.2"
dependencies = [
"anyhow",
"clap 4.5.40",
"dynamo-llm",
"dynamo-runtime",
"serde",
"serde_json",
"tabled",
"tokio",
"tracing",
]
[[package]]
name = "local-ip-address"
version = "0.6.4"
...
...
@@ -4725,17 +4692,6 @@ dependencies = [
"serde",
]
[[package]]
name = "papergrid"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b915f831b85d984193fdc3d3611505871dc139b2534530fa01c1a6a6707b6723"
dependencies = [
"bytecount",
"fnv",
"unicode-width 0.2.0",
]
[[package]]
name = "parking_lot"
version = "0.12.4"
...
...
@@ -6848,29 +6804,6 @@ dependencies = [
"version-compare",
]
[[package]]
name = "tabled"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "121d8171ee5687a4978d1b244f7d99c43e7385a272185a2f1e1fa4dc0979d444"
dependencies = [
"papergrid",
"tabled_derive",
]
[[package]]
name = "tabled_derive"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52d9946811baad81710ec921809e2af67ad77719418673b2a3794932d57b7538"
dependencies = [
"heck 0.5.0",
"proc-macro-error2",
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "target-lexicon"
version = "0.12.16"
...
...
Cargo.toml
View file @
ad8ad66b
...
...
@@ -15,7 +15,6 @@
[workspace]
members
=
[
"components/http"
,
"components/metrics"
,
"components/router"
,
"launch/*"
,
...
...
Earthfile
View file @
ad8ad66b
...
...
@@ -120,9 +120,7 @@ dynamo-build:
# Remove existing symlinks
rm -f /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/* && \
# Create new symlinks pointing to the correct location
ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run && \
ln -sf /workspace/target/release/http /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/http && \
ln -sf /workspace/target/release/llmctl /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/llmctl
ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
RUN cd /workspace/lib/bindings/python && \
...
...
README.md
View file @
ad8ad66b
...
...
@@ -172,8 +172,6 @@ Otherwise, to develop locally, we recommend working inside of the container
cargo build
--release
mkdir
-p
/workspace/deploy/sdk/src/dynamo/sdk/cli/bin
cp
/workspace/target/release/http /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
cp
/workspace/target/release/llmctl /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
cp
/workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
uv pip
install
-e
.
...
...
components/http/Cargo.toml
deleted
100644 → 0
View file @
480b41d1
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
name
=
"http"
version.workspace
=
true
edition.workspace
=
true
authors.workspace
=
true
license.workspace
=
true
homepage.workspace
=
true
repository.workspace
=
true
[dependencies]
dynamo-runtime
=
{
workspace
=
true
}
dynamo-llm
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
}
components/http/src/main.rs
deleted
100644 → 0
View file @
480b41d1
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
clap
::
Parser
;
use
dynamo_llm
::
discovery
::{
ModelWatcher
,
MODEL_ROOT_PATH
};
use
dynamo_llm
::
http
::
service
::
service_v2
::
HttpService
;
use
dynamo_runtime
::{
logging
,
pipeline
::
RouterMode
,
transports
::
etcd
::
PrefixWatcher
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
#[derive(Parser)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// Host for the HTTP service
#[arg(long,
default_value
=
"0.0.0.0"
)]
host
:
String
,
/// Port number for the HTTP service
#[arg(short,
long,
default_value
=
"8080"
)]
port
:
u16
,
/// Namespace for the distributed component
#[arg(long,
default_value
=
"public"
)]
namespace
:
String
,
/// Component name for the service
#[arg(long,
default_value
=
"http"
)]
component
:
String
,
}
#[tokio::main]
async
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
let
worker
=
Worker
::
from_current
()
?
;
worker
.execute_async
(
app
)
.await
}
async
fn
app
(
runtime
:
Runtime
)
->
Result
<
()
>
{
let
distributed
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
let
args
=
Args
::
parse
();
let
http_service
=
HttpService
::
builder
()
.port
(
args
.port
)
.host
(
args
.host
)
.build
()
?
;
let
manager
=
http_service
.state
()
.manager_clone
();
// todo - use the IntoComponent trait to register the component
// todo - start a service
// todo - we want the service to create an entry and register component definition
// todo - the component definition should be the type of component and it's config
// in this example we will have an HttpServiceComponentDefinition object which will be
// written to etcd
// the cli when operating on an `http` component will validate the namespace.component is
// registered with HttpServiceComponentDefinition
let
watch_obj
=
ModelWatcher
::
new
(
distributed
.clone
(),
manager
,
RouterMode
::
Random
,
None
);
if
let
Some
(
etcd_client
)
=
distributed
.etcd_client
()
{
let
models_watcher
:
PrefixWatcher
=
etcd_client
.kv_get_and_watch_prefix
(
MODEL_ROOT_PATH
)
.await
?
;
let
(
_
prefix
,
_
watcher
,
receiver
)
=
models_watcher
.dissolve
();
tokio
::
spawn
(
async
move
{
watch_obj
.watch
(
receiver
)
.await
;
});
}
// Run the service
http_service
.run
(
runtime
.child_token
())
.await
}
container/Dockerfile.none
View file @
ad8ad66b
...
...
@@ -49,8 +49,6 @@ ENV CARGO_TARGET_DIR=/workspace/target
RUN cargo build --release --locked && \
cargo doc --no-deps && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/http /usr/local/bin && \
cp target/release/llmctl /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
...
...
container/Dockerfile.sglang
View file @
ad8ad66b
...
...
@@ -367,8 +367,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/http /usr/local/bin && \
cp target/release/llmctl /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
...
...
container/Dockerfile.sglang-deepep
View file @
ad8ad66b
...
...
@@ -132,8 +132,6 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
RUN cargo build --release
RUN mkdir -p deploy/sdk/src/dynamo/sdk/cli/bin
RUN cp target/release/http deploy/sdk/src/dynamo/sdk/cli/bin
RUN cp target/release/llmctl deploy/sdk/src/dynamo/sdk/cli/bin
RUN cp target/release/dynamo-run deploy/sdk/src/dynamo/sdk/cli/bin
RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../..
...
...
container/Dockerfile.tensorrt_llm
View file @
ad8ad66b
...
...
@@ -317,8 +317,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/http /usr/local/bin && \
cp target/release/llmctl /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
...
...
container/Dockerfile.vllm
View file @
ad8ad66b
...
...
@@ -432,8 +432,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/http /usr/local/bin && \
cp target/release/llmctl /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
...
...
container/Dockerfile.vllm_v1
View file @
ad8ad66b
...
...
@@ -412,8 +412,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/http /usr/local/bin && \
cp target/release/llmctl /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
...
...
deploy/sdk/src/dynamo/sdk/cli/run_executable.py
View file @
ad8ad66b
...
...
@@ -69,28 +69,6 @@ def dynamo_run(args=None):
return
result
.
returncode
def
llmctl
(
args
=
None
):
"""
Run the llmctl executable with the provided arguments.
If no args provided, passes through sys.argv[1:] to the executable.
"""
if
args
is
None
:
args
=
sys
.
argv
[
1
:]
result
=
run_executable
(
"llmctl"
,
args
=
args
,
capture_output
=
False
)
return
result
.
returncode
def
http
(
args
=
None
):
"""
Run the http executable with the provided arguments.
If no args provided, passes through sys.argv[1:] to the executable.
"""
if
args
is
None
:
args
=
sys
.
argv
[
1
:]
result
=
run_executable
(
"http"
,
args
=
args
,
capture_output
=
False
)
return
result
.
returncode
def
metrics
(
args
=
None
):
"""
Run the metrics executable with the provided arguments.
...
...
docs/get_started.md
View file @
ad8ad66b
...
...
@@ -206,8 +206,6 @@ Otherwise, to develop locally, we recommend working inside of the container:
cargo build
--release
mkdir
-p
/workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
cp
/workspace/target/release/http /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
cp
/workspace/target/release/llmctl /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
cp
/workspace/target/release/dynamo-run /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
uv pip
install
-e
.
...
...
docs/guides/backend.md
View file @
ad8ad66b
...
...
@@ -92,10 +92,6 @@ class ResponseType(BaseModel):
# Add other fields as needed
```
For example, if you deploy your worker directly behind an OpenAI HTTP (
`http`
) service
using
`llmctl`
, you can define the request and response types to correspond to
Chat Completions objects, such as the ones specified in the OpenAI API. For example:
```
python
from
vllm.entrypoints.openai.protocol
import
ChatCompletionRequest
...
...
hatch_build.py
View file @
ad8ad66b
...
...
@@ -24,8 +24,6 @@ class CustomBuildHook(BuildHookInterface):
bin_path
=
os
.
getenv
(
"DYNAMO_BIN_PATH"
,
"target/release"
)
build_data
[
"force_include"
]
=
{
f
"
{
bin_path
}
/dynamo-run"
:
"dynamo/sdk/cli/bin/dynamo-run"
,
f
"
{
bin_path
}
/llmctl"
:
"dynamo/sdk/cli/bin/llmctl"
,
f
"
{
bin_path
}
/http"
:
"dynamo/sdk/cli/bin/http"
,
f
"
{
bin_path
}
/metrics"
:
"dynamo/sdk/cli/bin/metrics"
,
f
"
{
bin_path
}
/mock_worker"
:
"dynamo/sdk/cli/bin/mock_worker"
,
f
"
{
bin_path
}
/libdynamo_llm_capi.so"
:
"dynamo/sdk/cli/bin/libdynamo_llm_capi.so"
,
...
...
launch/llmctl/Cargo.toml
deleted
100644 → 0
View file @
480b41d1
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package]
name
=
"llmctl"
version.workspace
=
true
edition.workspace
=
true
authors.workspace
=
true
license.workspace
=
true
homepage.workspace
=
true
repository.workspace
=
true
[dependencies]
anyhow
=
{
workspace
=
true
}
dynamo-runtime
=
{
workspace
=
true
}
dynamo-llm
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
tracing
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
}
tabled
=
{
version
=
"0.18"
}
launch/llmctl/src/main.rs
deleted
100644 → 0
View file @
480b41d1
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
sync
::
Arc
;
use
clap
::{
Parser
,
Subcommand
};
use
dynamo_llm
::
discovery
::{
ModelManager
,
ModelWatcher
};
use
dynamo_llm
::
local_model
::{
LocalModelBuilder
,
ModelNetworkName
};
use
dynamo_llm
::
model_type
::
ModelType
;
use
dynamo_runtime
::
component
::
Endpoint
;
use
dynamo_runtime
::
pipeline
::
RouterMode
;
use
dynamo_runtime
::{
distributed
::
DistributedConfig
,
logging
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
// Macro to define model types and associated commands
macro_rules!
define_type_subcommands
{
(
$
((
$variant:ident
,
$primary_name:expr
,
[
$
(
$alias:expr
),
*
],
$help:expr
)),
*
$
(,)
?
)
=>
{
#[derive(Subcommand)]
enum
AddCommands
{
$
(
#[doc
=
$
help]
#[command(name
=
$
primary_name,
aliases
=
[$
(
$
alias),
*
]
)]
$variant
(
AddModelArgs
),
)
*
}
#[derive(Subcommand)]
enum
ListCommands
{
$
(
#[doc
=
concat
!
(
"List "
,
$
primary_name,
" models"
)]
#[command(name
=
$
primary_name,
aliases
=
[$
(
$
alias),
*
]
)]
$variant
,
)
*
}
#[derive(Subcommand)]
enum
RemoveCommands
{
$
(
#[doc
=
concat
!
(
"Remove "
,
$
primary_name,
" model"
)]
#[command(name
=
$
primary_name,
aliases
=
[$
(
$
alias),
*
]
)]
$variant
(
RemoveModelArgs
),
)
*
}
impl
AddCommands
{
fn
into_parts
(
self
)
->
(
ModelType
,
String
,
String
)
{
match
self
{
$
(
Self
::
$variant
(
args
)
=>
(
ModelType
::
$variant
,
args
.model_name
,
args
.endpoint_name
)),
*
}
}
}
impl
RemoveCommands
{
fn
into_parts
(
self
)
->
(
ModelType
,
String
)
{
match
self
{
$
(
Self
::
$variant
(
args
)
=>
(
ModelType
::
$variant
,
args
.model_name
)),
*
}
}
}
impl
ListCommands
{
fn
model_type
(
&
self
)
->
ModelType
{
match
self
{
$
(
Self
::
$variant
=>
ModelType
::
$variant
),
*
}
}
}
}
}
define_type_subcommands!
(
(
Chat
,
"chat"
,
[
"chat-model"
,
"chat-models"
],
"Add a chat model"
),
(
Completion
,
"completion"
,
[
"completions"
,
"completion-model"
],
"Add a completion model"
),
// Add new model types here:
(
Embedding
,
"embedding"
,
[
"embeddings"
,
"embedding-model"
],
"Add an embedding model"
)
);
#[derive(Parser)]
#[command(
author=
"NVIDIA"
,
version=
"0.2.1"
,
about=
"LLMCTL - Deprecated. Do not use."
,
long_about
=
None,
disable_help_subcommand
=
true
,
)]
struct
Cli
{
/// Public Namespace to operate in
/// Do not use this. In fact don't use anything about this file.
#[arg(short
=
'n'
,
long)]
public_namespace
:
Option
<
String
>
,
#[command(subcommand)]
command
:
Commands
,
}
#[derive(Subcommand)]
enum
Commands
{
/// HTTP service related commands
Http
{
#[command(subcommand)]
command
:
HttpCommands
,
},
}
#[derive(Subcommand)]
enum
HttpCommands
{
/// Add models
Add
{
#[command(subcommand)]
model_type
:
AddCommands
,
},
/// List models (all types if no specific type provided)
List
{
#[command(subcommand)]
model_type
:
Option
<
ListCommands
>
,
},
/// Remove models
Remove
{
#[command(subcommand)]
model_type
:
RemoveCommands
,
},
}
#[derive(Parser)]
struct
AddModelArgs
{
/// Model name (e.g. foo/v1)
#[arg(name
=
"model-name"
)]
model_name
:
String
,
/// Endpoint name (format: component.endpoint or namespace.component.endpoint)
#[arg(name
=
"endpoint-name"
)]
endpoint_name
:
String
,
}
/// Common fields for removing any model type
#[derive(Parser)]
struct
RemoveModelArgs
{
/// Name of the model to remove
#[arg(name
=
"model-name"
)]
model_name
:
String
,
}
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
let
cli
=
Cli
::
parse
();
// Default namespace to "dynamo" if not specified
let
namespace
=
cli
.public_namespace
.unwrap_or_else
(||
"dynamo"
.to_string
());
let
worker
=
Worker
::
from_settings
()
?
;
worker
.execute
(|
runtime
|
async
move
{
handle_command
(
runtime
,
namespace
,
cli
.command
)
.await
})
}
async
fn
handle_command
(
runtime
:
Runtime
,
namespace
:
String
,
command
:
Commands
)
->
Result
<
()
>
{
let
settings
=
DistributedConfig
::
for_cli
();
let
distributed
=
DistributedRuntime
::
new
(
runtime
,
settings
)
.await
?
;
match
command
{
Commands
::
Http
{
command
}
=>
{
match
command
{
HttpCommands
::
Add
{
model_type
}
=>
{
let
(
model_type
,
model_name
,
endpoint_name
)
=
model_type
.into_parts
();
add_model
(
&
distributed
,
namespace
.to_string
(),
model_type
,
model_name
,
&
endpoint_name
,
)
.await
?
;
}
HttpCommands
::
List
{
model_type
}
=>
{
match
model_type
{
Some
(
model_type
)
=>
{
list_models
(
&
distributed
,
namespace
.clone
(),
Some
(
model_type
.model_type
()),
)
.await
?
;
}
None
=>
{
// List all model types
list_models
(
&
distributed
,
namespace
.clone
(),
None
)
.await
?
;
}
}
}
HttpCommands
::
Remove
{
model_type
}
=>
{
let
(
model_type
,
name
)
=
model_type
.into_parts
();
remove_model
(
&
distributed
,
model_type
,
&
name
)
.await
?
;
}
}
}
}
Ok
(())
}
async
fn
add_model
(
distributed
:
&
DistributedRuntime
,
namespace
:
String
,
model_type
:
ModelType
,
model_name
:
String
,
endpoint_name
:
&
str
,
)
->
Result
<
()
>
{
tracing
::
debug!
(
"Adding model {model_name} with endpoint {endpoint_name}"
);
if
model_name
.starts_with
(
'/'
)
{
anyhow
::
bail!
(
"Model name '{model_name}' cannot start with a slash"
);
}
let
endpoint
=
endpoint_from_name
(
distributed
,
&
namespace
,
endpoint_name
)
?
;
let
mut
model
=
LocalModelBuilder
::
default
()
.model_name
(
Some
(
model_name
))
.build
()
.await
?
;
model
.attach
(
&
endpoint
,
model_type
)
.await
?
;
Ok
(())
}
#[derive(tabled::Tabled)]
struct
ModelRow
{
#[tabled(rename
=
"MODEL TYPE"
)]
model_type
:
String
,
#[tabled(rename
=
"MODEL NAME"
)]
name
:
String
,
#[tabled(rename
=
"NAMESPACE"
)]
namespace
:
String
,
#[tabled(rename
=
"COMPONENT"
)]
component
:
String
,
#[tabled(rename
=
"ENDPOINT"
)]
endpoint
:
String
,
}
async
fn
list_models
(
distributed
:
&
DistributedRuntime
,
namespace
:
String
,
model_type
:
Option
<
ModelType
>
,
)
->
Result
<
()
>
{
// We only need a ModelWatcher to call it's all_entries. llmctl is going away so no need to
// refactor for this.
let
watcher
=
ModelWatcher
::
new
(
distributed
.clone
(),
Arc
::
new
(
ModelManager
::
new
()),
RouterMode
::
Random
,
None
,
);
let
mut
models
=
Vec
::
new
();
for
entry
in
watcher
.all_entries
()
.await
?
{
match
(
model_type
,
entry
.model_type
)
{
(
None
,
_
)
=>
{
// list all
}
(
Some
(
want
),
got
)
if
want
==
got
=>
{
// match
}
_
=>
{
// no match
continue
;
}
}
models
.push
(
ModelRow
{
model_type
:
entry
.model_type
.as_str
()
.to_string
(),
name
:
entry
.name
,
namespace
:
entry
.endpoint.namespace
,
component
:
entry
.endpoint.component
,
endpoint
:
entry
.endpoint.name
,
});
}
if
models
.is_empty
()
{
match
&
model_type
{
Some
(
mt
)
=>
println!
(
"No {} models found in namespace: {}"
,
mt
.as_str
(),
namespace
),
None
=>
println!
(
"No models found in namespace: {}"
,
namespace
),
}
}
else
{
let
table
=
tabled
::
Table
::
new
(
models
);
match
&
model_type
{
Some
(
mt
)
=>
println!
(
"Listing {} models in namespace: {}"
,
mt
.as_str
(),
namespace
),
None
=>
println!
(
"Listing all models in namespace: {}"
,
namespace
),
}
println!
(
"{}"
,
table
);
}
Ok
(())
}
async
fn
remove_model
(
distributed
:
&
DistributedRuntime
,
model_type
:
ModelType
,
model_name
:
&
str
,
)
->
Result
<
()
>
{
// We have to do this manually because normally the etcd lease system does it for us
let
watcher
=
ModelWatcher
::
new
(
distributed
.clone
(),
Arc
::
new
(
ModelManager
::
new
()),
RouterMode
::
Random
,
None
,
);
let
Some
(
etcd_client
)
=
distributed
.etcd_client
()
else
{
anyhow
::
bail!
(
"llmctl is only useful with dynamic workers"
);
};
let
active_instances
=
watcher
.entries_for_model
(
model_name
)
.await
?
;
for
entry
in
active_instances
.into_iter
()
.filter
(|
entry
|
entry
.model_type
==
model_type
)
{
let
network_name
=
ModelNetworkName
::
from_entry
(
&
entry
,
0
);
tracing
::
debug!
(
"deleting key: {network_name}"
);
etcd_client
.kv_delete
(
network_name
.to_string
(),
None
)
.await
?
;
}
Ok
(())
}
fn
endpoint_from_name
(
distributed
:
&
DistributedRuntime
,
namespace
:
&
str
,
endpoint_name
:
&
str
,
)
->
anyhow
::
Result
<
Endpoint
>
{
let
parts
:
Vec
<&
str
>
=
endpoint_name
.split
(
'.'
)
.collect
();
if
parts
.len
()
<
2
{
anyhow
::
bail!
(
"Endpoint name '{}' is too short. Format should be 'component.endpoint' or 'namespace.component.endpoint'"
,
endpoint_name
);
}
else
if
parts
.len
()
>
3
{
anyhow
::
bail!
(
"Endpoint name '{}' is too long. Format should be 'component.endpoint' or 'namespace.component.endpoint'"
,
endpoint_name
);
}
// TODO previous version sometime hardcoded this to "http", so maybe adjust
let
component_name
=
parts
[
parts
.len
()
-
2
]
.to_string
();
let
endpoint_name
=
parts
[
parts
.len
()
-
1
]
.to_string
();
let
component
=
distributed
.namespace
(
namespace
)
?
.component
(
component_name
)
?
;
Ok
(
component
.endpoint
(
endpoint_name
))
}
pyproject.toml
View file @
ad8ad66b
...
...
@@ -71,8 +71,6 @@ vllm = [
[project.scripts]
dynamo
=
"dynamo.sdk.cli.cli:cli"
dynamo-run
=
"dynamo.sdk.cli.run_executable:dynamo_run"
llmctl
=
"dynamo.sdk.cli.run_executable:llmctl"
http
=
"dynamo.sdk.cli.run_executable:http"
metrics
=
"dynamo.sdk.cli.run_executable:metrics"
mock_worker
=
"dynamo.sdk.cli.run_executable:mock_worker"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment