Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
73c10ae9
Commit
73c10ae9
authored
Feb 20, 2025
by
Biswa Panda
Committed by
GitHub
Feb 20, 2025
Browse files
feat: add cli args for example http service (#221)
Co-authored-by:
Biswa Ranjan Panda
<
biswaranjanp@nvidia.com
>
parent
60a73634
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
39 additions
and
5 deletions
+39
-5
examples/python_rs/llm/vllm/README.md
examples/python_rs/llm/vllm/README.md
+2
-2
examples/rust/Cargo.lock
examples/rust/Cargo.lock
+1
-0
examples/rust/http/Cargo.toml
examples/rust/http/Cargo.toml
+1
-0
examples/rust/http/src/main.rs
examples/rust/http/src/main.rs
+29
-2
llm/rust/triton-llm/src/http/service/service_v2.rs
llm/rust/triton-llm/src/http/service/service_v2.rs
+6
-1
No files found.
examples/python_rs/llm/vllm/README.md
View file @
73c10ae9
...
...
@@ -59,7 +59,7 @@ Run the server logging (with debug level logging):
```
bash
TRD_LOG
=
DEBUG http
```
By default the server will run on port
9992
.
By default the server will run on port
8080
.
Add model to the server:
```
bash
...
...
@@ -116,7 +116,7 @@ The disaggregated deployment utilizes separate GPUs for prefill and decode opera
### 3. Client
```
bash
curl localhost:
9992
/v1/chat/completions
\
curl localhost:
8080
/v1/chat/completions
\
-H
"Content-Type: application/json"
\
-d
'{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
...
...
examples/rust/Cargo.lock
View file @
73c10ae9
...
...
@@ -1127,6 +1127,7 @@ dependencies = [
name = "http"
version = "0.2.0"
dependencies = [
"clap",
"serde",
"serde_json",
"tokio",
...
...
examples/rust/http/Cargo.toml
View file @
73c10ae9
...
...
@@ -26,6 +26,7 @@ repository.workspace = true
[dependencies]
triton-distributed
=
{
workspace
=
true
}
triton-llm
=
{
workspace
=
true
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
...
...
examples/rust/http/src/main.rs
View file @
73c10ae9
...
...
@@ -14,6 +14,8 @@
// limitations under the License.
use
std
::
sync
::
Arc
;
use
clap
::
Parser
;
use
std
::
env
;
use
triton_distributed
::{
logging
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
};
use
triton_llm
::
http
::
service
::{
...
...
@@ -21,6 +23,26 @@ use triton_llm::http::service::{
service_v2
::
HttpService
,
};
#[derive(Parser)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// Host for the HTTP service
#[arg(long,
default_value
=
"0.0.0.0"
)]
host
:
String
,
/// Port number for the HTTP service
#[arg(short,
long,
default_value
=
"8080"
)]
port
:
u16
,
/// Namespace for the distributed component
#[arg(long,
default_value
=
"public"
)]
namespace
:
String
,
/// Component name for the service
#[arg(long,
default_value
=
"http"
)]
component
:
String
,
}
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
let
worker
=
Worker
::
from_settings
()
?
;
...
...
@@ -30,8 +52,13 @@ fn main() -> Result<()> {
async
fn
app
(
runtime
:
Runtime
)
->
Result
<
()
>
{
let
distributed
=
DistributedRuntime
::
from_settings
(
runtime
.clone
())
.await
?
;
let
args
=
Args
::
parse
();
// create the http service and acquire the model manager
let
http_service
=
HttpService
::
builder
()
.port
(
9992
)
.build
()
?
;
let
http_service
=
HttpService
::
builder
()
.port
(
args
.port
)
.host
(
args
.host
)
.build
()
?
;
let
manager
=
http_service
.model_manager
()
.clone
();
// todo - use the IntoComponent trait to register the component
...
...
@@ -42,7 +69,7 @@ async fn app(runtime: Runtime) -> Result<()> {
// written to etcd
// the cli when operating on an `http` component will validate the namespace.component is
// registered with HttpServiceComponentDefinition
let
component
=
distributed
.namespace
(
"public"
)
?
.component
(
"http"
)
?
;
let
component
=
distributed
.namespace
(
&
args
.namespace
)
?
.component
(
&
args
.component
)
?
;
let
etcd_root
=
component
.etcd_path
();
let
etcd_path
=
format!
(
"{}/models/chat/"
,
etcd_root
);
...
...
llm/rust/triton-llm/src/http/service/service_v2.rs
View file @
73c10ae9
...
...
@@ -25,6 +25,7 @@ pub struct HttpService {
models
:
ModelManager
,
router
:
axum
::
Router
,
port
:
u16
,
host
:
String
,
}
#[derive(Clone,
Builder)]
...
...
@@ -33,6 +34,9 @@ pub struct HttpServiceConfig {
#[builder(default
=
"8787"
)]
port
:
u16
,
#[builder(setter(into),
default
=
"String::from(
\"
0.0.0.0
\"
)"
)]
host
:
String
,
// #[builder(default)]
// custom: Vec<axum::Router>
#[builder(default
=
"true"
)]
...
...
@@ -57,7 +61,7 @@ impl HttpService {
}
pub
async
fn
run
(
&
self
,
cancel_token
:
CancellationToken
)
->
Result
<
()
>
{
let
address
=
format!
(
"
0.0.0.0:{}"
,
self
.port
);
let
address
=
format!
(
"
{}:{}"
,
self
.host
,
self
.port
);
tracing
::
info!
(
address
,
"Starting HTTP service on: {address}"
);
let
listener
=
tokio
::
net
::
TcpListener
::
bind
(
address
.as_str
())
...
...
@@ -122,6 +126,7 @@ impl HttpServiceConfigBuilder {
models
:
model_manager
,
router
,
port
:
config
.port
,
host
:
config
.host
,
})
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment