Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1af7433b
Commit
1af7433b
authored
Mar 05, 2025
by
Neelay Shah
Committed by
GitHub
Mar 05, 2025
Browse files
refactor: rename triton_distributed to dynemo (#22)
Co-authored-by:
Graham King
<
grahamk@nvidia.com
>
parent
ee4ef06b
Changes
165
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
53 additions
and
53 deletions
+53
-53
lib/llm/Cargo.toml
lib/llm/Cargo.toml
+5
-5
lib/llm/src/backend.rs
lib/llm/src/backend.rs
+1
-1
lib/llm/src/engines/llamacpp.rs
lib/llm/src/engines/llamacpp.rs
+5
-5
lib/llm/src/engines/mistralrs.rs
lib/llm/src/engines/mistralrs.rs
+4
-4
lib/llm/src/engines/sglang.rs
lib/llm/src/engines/sglang.rs
+2
-2
lib/llm/src/engines/sglang/engine.rs
lib/llm/src/engines/sglang/engine.rs
+4
-4
lib/llm/src/engines/sglang/worker.rs
lib/llm/src/engines/sglang/worker.rs
+2
-2
lib/llm/src/engines/trtllm.rs
lib/llm/src/engines/trtllm.rs
+1
-1
lib/llm/src/engines/trtllm/executor/engine.rs
lib/llm/src/engines/trtllm/executor/engine.rs
+3
-3
lib/llm/src/engines/vllm.rs
lib/llm/src/engines/vllm.rs
+2
-2
lib/llm/src/engines/vllm/engine.rs
lib/llm/src/engines/vllm/engine.rs
+4
-4
lib/llm/src/engines/vllm/ray.rs
lib/llm/src/engines/vllm/ray.rs
+1
-1
lib/llm/src/engines/vllm/worker.rs
lib/llm/src/engines/vllm/worker.rs
+2
-2
lib/llm/src/http/service/discovery.rs
lib/llm/src/http/service/discovery.rs
+1
-1
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+3
-3
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+3
-3
lib/llm/src/kv_router/publisher.rs
lib/llm/src/kv_router/publisher.rs
+5
-5
lib/llm/src/kv_router/worker.rs
lib/llm/src/kv_router/worker.rs
+2
-2
lib/llm/src/lib.rs
lib/llm/src/lib.rs
+2
-2
lib/llm/src/model_card/model.rs
lib/llm/src/model_card/model.rs
+1
-1
No files found.
lib/llm/Cargo.toml
View file @
1af7433b
...
@@ -18,11 +18,11 @@ version = "0.2.1"
...
@@ -18,11 +18,11 @@ version = "0.2.1"
edition
=
"2021"
edition
=
"2021"
authors
=
["NVIDIA"]
authors
=
["NVIDIA"]
license
=
"Apache-2.0"
license
=
"Apache-2.0"
homepage
=
"https://github.com/
triton-inference-server/triton_distributed
"
homepage
=
"https://github.com/
dynemo-ai/dynemo
"
repository
=
"https://github.com/
triton-inference-server/triton_distributed
"
repository
=
"https://github.com/
dynemo-ai/dynemo.git
"
[package]
[package]
name
=
"
triton-distributed
-llm"
name
=
"
dynemo
-llm"
version.workspace
=
true
version.workspace
=
true
edition.workspace
=
true
edition.workspace
=
true
authors.workspace
=
true
authors.workspace
=
true
...
@@ -43,7 +43,7 @@ vulkan = ["llama-cpp-2/vulkan"]
...
@@ -43,7 +43,7 @@ vulkan = ["llama-cpp-2/vulkan"]
[workspace.dependencies]
[workspace.dependencies]
# local or crates.io
# local or crates.io
triton-distributed
-runtime
=
{
version
=
"0.2.0"
,
path
=
"../runtime"
}
dynemo
-runtime
=
{
version
=
"0.2.0"
,
path
=
"../runtime"
}
# crates.io
# crates.io
anyhow
=
{
version
=
"1"
}
anyhow
=
{
version
=
"1"
}
...
@@ -66,7 +66,7 @@ strum = { version = "0.27", features = ["derive"] }
...
@@ -66,7 +66,7 @@ strum = { version = "0.27", features = ["derive"] }
[dependencies]
[dependencies]
# repo
# repo
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
# workspace
# workspace
anyhow
=
{
workspace
=
true
}
anyhow
=
{
workspace
=
true
}
...
...
lib/llm/src/backend.rs
View file @
1af7433b
...
@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt};
...
@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt};
use
tracing
as
log
;
use
tracing
as
log
;
use
crate
::
model_card
::
model
::{
ModelDeploymentCard
,
TokenizerKind
};
use
crate
::
model_card
::
model
::{
ModelDeploymentCard
,
TokenizerKind
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
pipeline
::{
pipeline
::{
async_trait
,
AsyncEngineContextProvider
,
ManyOut
,
Operator
,
ResponseStream
,
async_trait
,
AsyncEngineContextProvider
,
ManyOut
,
Operator
,
ResponseStream
,
ServerStreamingEngine
,
SingleIn
,
ServerStreamingEngine
,
SingleIn
,
...
...
lib/llm/src/engines/llamacpp.rs
View file @
1af7433b
...
@@ -22,6 +22,11 @@ use std::{
...
@@ -22,6 +22,11 @@ use std::{
use
anyhow
::
Context
;
use
anyhow
::
Context
;
use
async_stream
::
stream
;
use
async_stream
::
stream
;
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
dynemo_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynemo_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynemo_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
dynemo_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynemo_runtime
::
CancellationToken
;
use
llama_cpp_2
::{
use
llama_cpp_2
::{
context
::{
params
::
LlamaContextParams
,
LlamaContext
},
context
::{
params
::
LlamaContextParams
,
LlamaContext
},
llama_backend
::
LlamaBackend
,
llama_backend
::
LlamaBackend
,
...
@@ -30,11 +35,6 @@ use llama_cpp_2::{
...
@@ -30,11 +35,6 @@ use llama_cpp_2::{
sampling
::
LlamaSampler
,
sampling
::
LlamaSampler
,
token
::
LlamaToken
,
token
::
LlamaToken
,
};
};
use
triton_distributed_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
triton_distributed_runtime
::
pipeline
::
error
as
pipeline_error
;
use
triton_distributed_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
triton_distributed_runtime
::
protocols
::
annotated
::
Annotated
;
use
triton_distributed_runtime
::
CancellationToken
;
use
crate
::
backend
::
ExecutionContext
;
use
crate
::
backend
::
ExecutionContext
;
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
...
...
lib/llm/src/engines/mistralrs.rs
View file @
1af7433b
...
@@ -28,10 +28,10 @@ use mistralrs::{
...
@@ -28,10 +28,10 @@ use mistralrs::{
};
};
use
tokio
::
sync
::
mpsc
::
channel
;
use
tokio
::
sync
::
mpsc
::
channel
;
use
triton_distributed
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynemo
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
triton_distributed
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynemo
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
triton_distributed
_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
dynemo
_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
triton_distributed
_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynemo
_runtime
::
protocols
::
annotated
::
Annotated
;
use
crate
::
protocols
::
openai
::
chat_completions
::{
use
crate
::
protocols
::
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
...
...
lib/llm/src/engines/sglang.rs
View file @
1af7433b
...
@@ -17,8 +17,8 @@ use std::path::Path;
...
@@ -17,8 +17,8 @@ use std::path::Path;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
crate
::
backend
::
ExecutionContext
;
use
crate
::
backend
::
ExecutionContext
;
use
triton_distributed
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynemo
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
triton_distributed
_runtime
::
CancellationToken
;
use
dynemo
_runtime
::
CancellationToken
;
mod
worker
;
mod
worker
;
...
...
lib/llm/src/engines/sglang/engine.rs
View file @
1af7433b
...
@@ -19,10 +19,10 @@ use async_stream::stream;
...
@@ -19,10 +19,10 @@ use async_stream::stream;
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
use
triton_distributed
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynemo
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
triton_distributed
_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
dynemo
_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
triton_distributed
_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynemo
_runtime
::
protocols
::
annotated
::
Annotated
;
use
triton_distributed
_runtime
::
runtime
::
CancellationToken
;
use
dynemo
_runtime
::
runtime
::
CancellationToken
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
engines
::
MultiNodeConfig
;
...
...
lib/llm/src/engines/sglang/worker.rs
View file @
1af7433b
...
@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender;
...
@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender;
use
tokio
::{
io
::
AsyncBufReadExt
,
sync
::
mpsc
::
error
::
SendError
};
use
tokio
::{
io
::
AsyncBufReadExt
,
sync
::
mpsc
::
error
::
SendError
};
use
tokio
::{
io
::
AsyncReadExt
as
_
,
task
::
JoinHandle
};
use
tokio
::{
io
::
AsyncReadExt
as
_
,
task
::
JoinHandle
};
use
triton_distributed
_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynemo
_runtime
::
protocols
::
annotated
::
Annotated
;
use
triton_distributed
_runtime
::
runtime
::
CancellationToken
;
use
dynemo
_runtime
::
runtime
::
CancellationToken
;
use
crate
::
engines
::
sglang
::
MultiGPUConfig
;
use
crate
::
engines
::
sglang
::
MultiGPUConfig
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
engines
::
MultiNodeConfig
;
...
...
lib/llm/src/engines/trtllm.rs
View file @
1af7433b
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
crate
::
backend
::
ExecutionContext
;
use
crate
::
backend
::
ExecutionContext
;
use
triton_distributed
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynemo
_runtime
::
pipeline
::
error
as
pipeline_error
;
pub
mod
executor
;
pub
mod
executor
;
...
...
lib/llm/src/engines/trtllm/executor/engine.rs
View file @
1af7433b
...
@@ -15,12 +15,12 @@
...
@@ -15,12 +15,12 @@
use
anyhow
::{
Error
,
Result
};
use
anyhow
::{
Error
,
Result
};
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
dynemo_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynemo_runtime
::
pipeline
::{
ManyOut
,
SingleIn
};
use
dynemo_runtime
::
protocols
::
annotated
::
Annotated
;
use
futures
::
stream
;
use
futures
::
stream
;
use
tokio
::
sync
::
mpsc
;
use
tokio
::
sync
::
mpsc
;
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
triton_distributed_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
triton_distributed_runtime
::
pipeline
::{
ManyOut
,
SingleIn
};
use
triton_distributed_runtime
::
protocols
::
annotated
::
Annotated
;
use
super
::
Executor
;
use
super
::
Executor
;
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
...
...
lib/llm/src/engines/vllm.rs
View file @
1af7433b
...
@@ -19,8 +19,8 @@ use std::pin::Pin;
...
@@ -19,8 +19,8 @@ use std::pin::Pin;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
std
::
task
::{
Context
,
Poll
};
use
std
::
task
::{
Context
,
Poll
};
use
triton_distributed
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
dynemo
_runtime
::
pipeline
::
error
as
pipeline_error
;
use
triton_distributed
_runtime
::
CancellationToken
;
use
dynemo
_runtime
::
CancellationToken
;
use
crate
::
backend
::
ExecutionContext
;
use
crate
::
backend
::
ExecutionContext
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
engines
::
MultiNodeConfig
;
...
...
lib/llm/src/engines/vllm/engine.rs
View file @
1af7433b
...
@@ -21,10 +21,10 @@ use async_trait::async_trait;
...
@@ -21,10 +21,10 @@ use async_trait::async_trait;
use
crate
::
engines
::
vllm
::
worker
;
use
crate
::
engines
::
vllm
::
worker
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
use
crate
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
LLMEngineOutput
};
use
triton_distributed
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynemo
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
triton_distributed
_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
dynemo
_runtime
::
pipeline
::{
Error
,
ManyOut
,
SingleIn
};
use
triton_distributed
_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynemo
_runtime
::
protocols
::
annotated
::
Annotated
;
use
triton_distributed
_runtime
::
runtime
::
CancellationToken
;
use
dynemo
_runtime
::
runtime
::
CancellationToken
;
pub
struct
VllmEngine
{
pub
struct
VllmEngine
{
cancel_token
:
CancellationToken
,
cancel_token
:
CancellationToken
,
...
...
lib/llm/src/engines/vllm/ray.rs
View file @
1af7433b
...
@@ -24,7 +24,7 @@ use tokio::select;
...
@@ -24,7 +24,7 @@ use tokio::select;
use
tokio
::
time
;
use
tokio
::
time
;
use
tracing
;
use
tracing
;
use
triton_distributed
_runtime
::
CancellationToken
;
use
dynemo
_runtime
::
CancellationToken
;
/// Default is 16 seconds, we make it a bit shorter
/// Default is 16 seconds, we make it a bit shorter
const
RAY_STOP_TIMEOUT_SECS
:
u32
=
10
;
const
RAY_STOP_TIMEOUT_SECS
:
u32
=
10
;
...
...
lib/llm/src/engines/vllm/worker.rs
View file @
1af7433b
...
@@ -19,6 +19,8 @@ use std::{
...
@@ -19,6 +19,8 @@ use std::{
};
};
use
async_zmq
::{
SinkExt
,
StreamExt
};
use
async_zmq
::{
SinkExt
,
StreamExt
};
use
dynemo_runtime
::
protocols
::
annotated
::
Annotated
;
use
dynemo_runtime
::
CancellationToken
;
use
pyo3
::{
use
pyo3
::{
prelude
::
*
,
prelude
::
*
,
types
::{
IntoPyDict
,
PyBytes
,
PyString
},
types
::{
IntoPyDict
,
PyBytes
,
PyString
},
...
@@ -26,8 +28,6 @@ use pyo3::{
...
@@ -26,8 +28,6 @@ use pyo3::{
use
tokio
::
sync
::
mpsc
::
Sender
;
use
tokio
::
sync
::
mpsc
::
Sender
;
use
tokio
::
task
::
JoinHandle
;
use
tokio
::
task
::
JoinHandle
;
use
tokio
::{
io
::
AsyncBufReadExt
,
sync
::
mpsc
::
error
::
SendError
};
use
tokio
::{
io
::
AsyncBufReadExt
,
sync
::
mpsc
::
error
::
SendError
};
use
triton_distributed_runtime
::
protocols
::
annotated
::
Annotated
;
use
triton_distributed_runtime
::
CancellationToken
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
engines
::
MultiNodeConfig
;
use
crate
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
use
crate
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
...
...
lib/llm/src/http/service/discovery.rs
View file @
1af7433b
...
@@ -18,7 +18,7 @@ use std::sync::Arc;
...
@@ -18,7 +18,7 @@ use std::sync::Arc;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
tokio
::
sync
::
mpsc
::
Receiver
;
use
tokio
::
sync
::
mpsc
::
Receiver
;
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
protocols
::{
self
,
annotated
::
Annotated
},
protocols
::{
self
,
annotated
::
Annotated
},
raise
,
raise
,
transports
::
etcd
::{
KeyValue
,
WatchEvent
},
transports
::
etcd
::{
KeyValue
,
WatchEvent
},
...
...
lib/llm/src/http/service/openai.rs
View file @
1af7433b
...
@@ -48,7 +48,7 @@ use crate::types::{
...
@@ -48,7 +48,7 @@ use crate::types::{
Annotated
,
Annotated
,
};
};
use
triton_distributed
_runtime
::
pipeline
::{
AsyncEngineContext
,
Context
};
use
dynemo
_runtime
::
pipeline
::{
AsyncEngineContext
,
Context
};
#[derive(Serialize,
Deserialize)]
#[derive(Serialize,
Deserialize)]
pub
(
crate
)
struct
ErrorResponse
{
pub
(
crate
)
struct
ErrorResponse
{
...
@@ -91,7 +91,7 @@ impl ErrorResponse {
...
@@ -91,7 +91,7 @@ impl ErrorResponse {
)
)
}
}
/// The OAI endpoints call an [`
triton_distributed
_runtime::engine::AsyncEngine`] which are specialized to return
/// The OAI endpoints call an [`
dynemo
_runtime::engine::AsyncEngine`] which are specialized to return
/// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`].
/// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`].
/// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`]
/// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`]
/// with the details of the error.
/// with the details of the error.
...
@@ -516,7 +516,7 @@ pub fn list_models_router(
...
@@ -516,7 +516,7 @@ pub fn list_models_router(
path
:
Option
<
String
>
,
path
:
Option
<
String
>
,
)
->
(
Vec
<
RouteDoc
>
,
Router
)
{
)
->
(
Vec
<
RouteDoc
>
,
Router
)
{
// TODO: Why do we have this endpoint?
// TODO: Why do we have this endpoint?
let
custom_path
=
path
.unwrap_or
(
"/
triton
/alpha/list-models"
.to_string
());
let
custom_path
=
path
.unwrap_or
(
"/
dynemo
/alpha/list-models"
.to_string
());
let
doc_for_custom
=
RouteDoc
::
new
(
axum
::
http
::
Method
::
GET
,
&
custom_path
);
let
doc_for_custom
=
RouteDoc
::
new
(
axum
::
http
::
Method
::
GET
,
&
custom_path
);
// Standard OpenAI compatible list models endpoint
// Standard OpenAI compatible list models endpoint
...
...
lib/llm/src/kv_router.rs
View file @
1af7433b
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
// limitations under the License.
// limitations under the License.
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
dynemo_runtime
::{
component
::
Component
,
DistributedRuntime
};
use
futures
::
stream
::
StreamExt
;
use
futures
::
stream
::
StreamExt
;
use
std
::{
sync
::
Arc
,
time
::
Duration
};
use
std
::{
sync
::
Arc
,
time
::
Duration
};
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
use
tracing
;
use
tracing
;
use
triton_distributed_runtime
::{
component
::
Component
,
DistributedRuntime
};
pub
mod
indexer
;
pub
mod
indexer
;
pub
mod
protocols
;
pub
mod
protocols
;
...
@@ -62,7 +62,7 @@ impl KvRouter {
...
@@ -62,7 +62,7 @@ impl KvRouter {
}
}
pub
async
fn
new
(
pub
async
fn
new
(
nats_client
:
triton_distributed
_runtime
::
transports
::
nats
::
Client
,
nats_client
:
dynemo
_runtime
::
transports
::
nats
::
Client
,
service_name
:
String
,
service_name
:
String
,
kv_subject
:
String
,
kv_subject
:
String
,
)
->
Result
<
Arc
<
Self
>>
{
)
->
Result
<
Arc
<
Self
>>
{
...
@@ -135,7 +135,7 @@ impl KvRouter {
...
@@ -135,7 +135,7 @@ impl KvRouter {
}
}
async
fn
collect_endpoints
(
async
fn
collect_endpoints
(
nats_client
:
triton_distributed
_runtime
::
transports
::
nats
::
Client
,
nats_client
:
dynemo
_runtime
::
transports
::
nats
::
Client
,
service_name
:
String
,
service_name
:
String
,
ep_tx
:
tokio
::
sync
::
mpsc
::
Sender
<
ProcessedEndpoints
>
,
ep_tx
:
tokio
::
sync
::
mpsc
::
Sender
<
ProcessedEndpoints
>
,
cancel
:
CancellationToken
,
cancel
:
CancellationToken
,
...
...
lib/llm/src/kv_router/publisher.rs
View file @
1af7433b
...
@@ -15,11 +15,7 @@
...
@@ -15,11 +15,7 @@
use
crate
::
kv_router
::{
indexer
::
RouterEvent
,
protocols
::
*
,
KV_EVENT_SUBJECT
};
use
crate
::
kv_router
::{
indexer
::
RouterEvent
,
protocols
::
*
,
KV_EVENT_SUBJECT
};
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
futures
::
stream
;
use
dynemo_runtime
::{
use
std
::
sync
::
Arc
;
use
tokio
::
sync
::
mpsc
;
use
tracing
as
log
;
use
triton_distributed_runtime
::{
component
::
Component
,
component
::
Component
,
pipeline
::{
pipeline
::{
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
ManyOut
,
ResponseStream
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
ManyOut
,
ResponseStream
,
...
@@ -28,6 +24,10 @@ use triton_distributed_runtime::{
...
@@ -28,6 +24,10 @@ use triton_distributed_runtime::{
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
DistributedRuntime
,
Error
,
Result
,
DistributedRuntime
,
Error
,
Result
,
};
};
use
futures
::
stream
;
use
std
::
sync
::
Arc
;
use
tokio
::
sync
::
mpsc
;
use
tracing
as
log
;
pub
struct
KvEventPublisher
{
pub
struct
KvEventPublisher
{
tx
:
mpsc
::
UnboundedSender
<
KvCacheEvent
>
,
tx
:
mpsc
::
UnboundedSender
<
KvCacheEvent
>
,
...
...
lib/llm/src/kv_router/worker.rs
View file @
1af7433b
...
@@ -19,12 +19,12 @@ pub use crate::kv_router::protocols::ForwardPassMetrics;
...
@@ -19,12 +19,12 @@ pub use crate::kv_router::protocols::ForwardPassMetrics;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
use
triton_distributed
_runtime
::
pipeline
::
network
::{
use
dynemo
_runtime
::
pipeline
::
network
::{
ingress
::
push_endpoint
::
PushEndpoint
,
ingress
::
push_endpoint
::
PushEndpoint
,
PushWorkHandler
,
PushWorkHandler
,
};
};
use
triton_distributed
_runtime
::
transports
::
nats
::{
self
,
ServiceExt
};
use
dynemo
_runtime
::
transports
::
nats
::{
self
,
ServiceExt
};
use
tokio
::
sync
::
watch
;
use
tokio
::
sync
::
watch
;
use
tokio_util
::
sync
::
CancellationToken
;
use
tokio_util
::
sync
::
CancellationToken
;
...
...
lib/llm/src/lib.rs
View file @
1af7433b
...
@@ -13,9 +13,9 @@
...
@@ -13,9 +13,9 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
//! #
Triton
LLM
//! #
Dynemo
LLM
//!
//!
//! The `
triton
-llm` crate is a Rust library that provides a set of traits and types for building
//! The `
dynemo
-llm` crate is a Rust library that provides a set of traits and types for building
//! distributed LLM inference solutions.
//! distributed LLM inference solutions.
pub
mod
backend
;
pub
mod
backend
;
...
...
lib/llm/src/model_card/model.rs
View file @
1af7433b
...
@@ -37,7 +37,7 @@ use std::time::Duration;
...
@@ -37,7 +37,7 @@ use std::time::Duration;
use
derive_builder
::
Builder
;
use
derive_builder
::
Builder
;
use
triton_distributed
_runtime
::
slug
::
Slug
;
use
dynemo
_runtime
::
slug
::
Slug
;
pub
const
BUCKET_NAME
:
&
str
=
"mdc"
;
pub
const
BUCKET_NAME
:
&
str
=
"mdc"
;
...
...
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment