Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1af7433b
Commit
1af7433b
authored
Mar 05, 2025
by
Neelay Shah
Committed by
GitHub
Mar 05, 2025
Browse files
refactor: rename triton_distributed to dynemo (#22)
Co-authored-by:
Graham King
<
grahamk@nvidia.com
>
parent
ee4ef06b
Changes
165
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
148 additions
and
155 deletions
+148
-155
lib/bindings/python/examples/hello_world/client.py
lib/bindings/python/examples/hello_world/client.py
+3
-3
lib/bindings/python/examples/hello_world/run.py
lib/bindings/python/examples/hello_world/run.py
+2
-2
lib/bindings/python/examples/hello_world/server.py
lib/bindings/python/examples/hello_world/server.py
+3
-3
lib/bindings/python/examples/pipeline/backend.py
lib/bindings/python/examples/pipeline/backend.py
+2
-2
lib/bindings/python/examples/pipeline/frontend.py
lib/bindings/python/examples/pipeline/frontend.py
+2
-2
lib/bindings/python/examples/pipeline/middle.py
lib/bindings/python/examples/pipeline/middle.py
+2
-2
lib/bindings/python/examples/pipeline/pipeline.py
lib/bindings/python/examples/pipeline/pipeline.py
+2
-2
lib/bindings/python/examples/typed/client.py
lib/bindings/python/examples/typed/client.py
+3
-5
lib/bindings/python/examples/typed/server.py
lib/bindings/python/examples/typed/server.py
+4
-8
lib/bindings/python/rust/engine.rs
lib/bindings/python/rust/engine.rs
+2
-3
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+2
-2
lib/bindings/python/rust/llm/backend.rs
lib/bindings/python/rust/llm/backend.rs
+1
-1
lib/bindings/python/rust/llm/preprocessor.rs
lib/bindings/python/rust/llm/preprocessor.rs
+2
-2
lib/bindings/python/src/dynemo/_core.pyi
lib/bindings/python/src/dynemo/_core.pyi
+1
-1
lib/bindings/python/src/dynemo/llm/__init__.py
lib/bindings/python/src/dynemo/llm/__init__.py
+2
-2
lib/bindings/python/src/dynemo/runtime/__init__.py
lib/bindings/python/src/dynemo/runtime/__init__.py
+8
-8
lib/bindings/python/tests/soak.py
lib/bindings/python/tests/soak.py
+2
-2
lib/bindings/python/tests/test_bindings_install.py
lib/bindings/python/tests/test_bindings_install.py
+1
-1
lib/bindings/python/tests/test_etcd_bindings.py
lib/bindings/python/tests/test_etcd_bindings.py
+1
-1
lib/llm/Cargo.lock
lib/llm/Cargo.lock
+103
-103
No files found.
lib/bindings/python/examples/hello_world/client.py
View file @
1af7433b
...
...
@@ -17,12 +17,12 @@ import asyncio
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
await
init
(
runtime
,
"
triton-init
"
)
await
init
(
runtime
,
"
dynemo
"
)
async
def
init
(
runtime
:
DistributedRuntime
,
ns
:
str
):
...
...
lib/bindings/python/examples/hello_world/run.py
View file @
1af7433b
...
...
@@ -21,7 +21,7 @@ import uvloop
from
client
import
init
as
client_init
from
server
import
init
as
server_init
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
def
random_string
(
length
=
10
):
...
...
@@ -29,7 +29,7 @@ def random_string(length=10):
return
""
.
join
(
random
.
choices
(
chars
,
k
=
length
))
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
ns
=
random_string
()
task
=
asyncio
.
create_task
(
server_init
(
runtime
,
ns
))
...
...
lib/bindings/python/examples/hello_world/server.py
View file @
1af7433b
...
...
@@ -17,7 +17,7 @@ import asyncio
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
class
RequestHandler
:
...
...
@@ -31,9 +31,9 @@ class RequestHandler:
yield
char
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
await
init
(
runtime
,
"
triton-init
"
)
await
init
(
runtime
,
"
dynemo
"
)
async
def
init
(
runtime
:
DistributedRuntime
,
ns
:
str
):
...
...
lib/bindings/python/examples/pipeline/backend.py
View file @
1af7433b
...
...
@@ -17,7 +17,7 @@ import asyncio
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
uvloop
.
install
()
...
...
@@ -29,7 +29,7 @@ class RequestHandler:
yield
char
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
component
=
runtime
.
namespace
(
"examples/pipeline"
).
component
(
"backend"
)
await
component
.
create_service
()
...
...
lib/bindings/python/examples/pipeline/frontend.py
View file @
1af7433b
...
...
@@ -17,7 +17,7 @@ import asyncio
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
uvloop
.
install
()
...
...
@@ -32,7 +32,7 @@ class RequestHandler:
yield
output
.
get
(
"data"
)
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
# client to the next component - in this case the middle component
next
=
(
...
...
lib/bindings/python/examples/pipeline/middle.py
View file @
1af7433b
...
...
@@ -17,7 +17,7 @@ import asyncio
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
uvloop
.
install
()
...
...
@@ -32,7 +32,7 @@ class RequestHandler:
yield
output
.
get
(
"data"
)
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
# client to backend
backend
=
(
...
...
lib/bindings/python/examples/pipeline/pipeline.py
View file @
1af7433b
...
...
@@ -17,12 +17,12 @@ import asyncio
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
uvloop
.
install
()
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
"""
# Pipeline Example
...
...
lib/bindings/python/examples/typed/client.py
View file @
1af7433b
...
...
@@ -17,18 +17,16 @@ import asyncio
from
protocol
import
Request
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
"""
Instantiate a `backend` client and call the `generate` endpoint
"""
# get endpoint
endpoint
=
(
runtime
.
namespace
(
"triton-init"
).
component
(
"backend"
).
endpoint
(
"generate"
)
)
endpoint
=
runtime
.
namespace
(
"dynemo"
).
component
(
"backend"
).
endpoint
(
"generate"
)
# create client
client
=
await
endpoint
.
client
()
...
...
lib/bindings/python/examples/typed/server.py
View file @
1af7433b
...
...
@@ -19,11 +19,7 @@ import asyncio
import
uvloop
from
protocol
import
Request
,
Response
from
triton_distributed.runtime
import
(
DistributedRuntime
,
triton_endpoint
,
triton_worker
,
)
from
dynemo.runtime
import
DistributedRuntime
,
dynemo_endpoint
,
dynemo_worker
uvloop
.
install
()
...
...
@@ -33,19 +29,19 @@ class RequestHandler:
Request handler for the generate endpoint
"""
@
triton
_endpoint
(
Request
,
Response
)
@
dynemo
_endpoint
(
Request
,
Response
)
async
def
generate
(
self
,
request
):
for
char
in
request
.
data
:
yield
char
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
"""
Instantiate a `backend` component and serve the `generate` endpoint
A `Component` can serve multiple endpoints
"""
component
=
runtime
.
namespace
(
"
triton-init
"
).
component
(
"backend"
)
component
=
runtime
.
namespace
(
"
dynemo
"
).
component
(
"backend"
)
await
component
.
create_service
()
endpoint
=
component
.
endpoint
(
"generate"
)
...
...
lib/bindings/python/rust/engine.rs
View file @
1af7433b
...
...
@@ -15,8 +15,7 @@
use
std
::
sync
::
Arc
;
pub
use
serde
::{
Deserialize
,
Serialize
};
pub
use
triton_distributed_runtime
::{
pub
use
dynemo_runtime
::{
error
,
pipeline
::{
async_trait
,
AsyncEngine
,
AsyncEngineContextProvider
,
Data
,
ManyOut
,
ResponseStream
,
...
...
@@ -25,6 +24,7 @@ pub use triton_distributed_runtime::{
protocols
::
annotated
::
Annotated
,
Error
,
Result
,
};
pub
use
serde
::{
Deserialize
,
Serialize
};
use
pyo3
::
prelude
::
*
;
use
pyo3_async_runtimes
::
TaskLocals
;
...
...
@@ -192,7 +192,6 @@ where
// tell the python async generator to stop generating
// right now, this is impossible as we are not passing the context to the python async generator
// todo: add task-local context to the python async generator
// see: https://github.com/triton-inference-server/triton_distributed/issues/130
ctx
.stop_generating
();
let
msg
=
format!
(
"critical error: invalid response object from python async generator; application-logic-mismatch: {}"
,
e
);
tracing
::
error!
(
request_id
,
"{}"
,
msg
);
...
...
lib/bindings/python/rust/lib.rs
View file @
1af7433b
...
...
@@ -25,14 +25,14 @@ use std::{fmt::Display, sync::Arc};
use
tokio
::
sync
::
Mutex
;
use
tracing_subscriber
::
FmtSubscriber
;
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
self
as
rs
,
pipeline
::{
EngineStream
,
ManyOut
,
SingleIn
},
protocols
::
annotated
::
Annotated
as
RsAnnotated
,
traits
::
DistributedRuntimeProvider
,
};
use
triton_distributed
_llm
::{
self
as
llm_rs
};
use
dynemo
_llm
::{
self
as
llm_rs
};
mod
engine
;
mod
llm
;
...
...
lib/bindings/python/rust/llm/backend.rs
View file @
1af7433b
...
...
@@ -19,7 +19,7 @@ use crate::llm::model_card::ModelDeploymentCard;
use
llm_rs
::
protocols
::
common
::
llm_backend
::{
BackendInput
,
BackendOutput
};
use
llm_rs
::
types
::
Annotated
;
use
triton_distributed
_runtime
::
pipeline
::{
Operator
,
ServiceBackend
,
ServiceFrontend
,
Source
};
use
dynemo
_runtime
::
pipeline
::{
Operator
,
ServiceBackend
,
ServiceFrontend
,
Source
};
use
crate
::
engine
::
PythonAsyncEngine
;
...
...
lib/bindings/python/rust/llm/preprocessor.rs
View file @
1af7433b
...
...
@@ -27,9 +27,9 @@ use llm_rs::{
},
};
use
triton_distributed
_runtime
::
pipeline
::{
Operator
,
ServiceFrontend
,
Source
};
use
dynemo
_runtime
::
pipeline
::{
Operator
,
ServiceFrontend
,
Source
};
use
triton_distributed
_runtime
::
pipeline
::{
ManyOut
,
SegmentSink
,
SingleIn
};
use
dynemo
_runtime
::
pipeline
::{
ManyOut
,
SegmentSink
,
SingleIn
};
#[pyclass]
pub
(
crate
)
struct
OAIChatPreprocessor
{
...
...
lib/bindings/python/src/
triton_distributed
/_core.pyi
→
lib/bindings/python/src/
dynemo
/_core.pyi
View file @
1af7433b
...
...
@@ -26,7 +26,7 @@ RequestHandler = Callable[[JsonLike], AsyncGenerator[JsonLike, None]]
class DistributedRuntime:
"""
The runtime object for
a distributed NOVA
applications
The runtime object for
dynemo
applications
"""
...
...
...
lib/bindings/python/src/
triton_distributed
/llm/__init__.py
→
lib/bindings/python/src/
dynemo
/llm/__init__.py
View file @
1af7433b
...
...
@@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
triton_distributed
._core
import
KvMetricsPublisher
as
KvMetricsPublisher
from
triton_distributed
._core
import
KvRouter
as
KvRouter
from
dynemo
._core
import
KvMetricsPublisher
as
KvMetricsPublisher
from
dynemo
._core
import
KvRouter
as
KvRouter
lib/bindings/python/src/
triton_distributed
/runtime/__init__.py
→
lib/bindings/python/src/
dynemo
/runtime/__init__.py
View file @
1af7433b
...
...
@@ -22,15 +22,15 @@ from pydantic import BaseModel, ValidationError
# List all the classes in the _core module for re-export
# import * causes "unable to detect undefined names"
from
triton_distributed
._core
import
Backend
as
Backend
from
triton_distributed
._core
import
Client
as
Client
from
triton_distributed
._core
import
DistributedRuntime
as
DistributedRuntime
from
triton_distributed
._core
import
KvRouter
as
KvRouter
from
triton_distributed
._core
import
ModelDeploymentCard
as
ModelDeploymentCard
from
triton_distributed
._core
import
OAIChatPreprocessor
as
OAIChatPreprocessor
from
dynemo
._core
import
Backend
as
Backend
from
dynemo
._core
import
Client
as
Client
from
dynemo
._core
import
DistributedRuntime
as
DistributedRuntime
from
dynemo
._core
import
KvRouter
as
KvRouter
from
dynemo
._core
import
ModelDeploymentCard
as
ModelDeploymentCard
from
dynemo
._core
import
OAIChatPreprocessor
as
OAIChatPreprocessor
def
triton
_worker
():
def
dynemo
_worker
():
def
decorator
(
func
):
@
wraps
(
func
)
async
def
wrapper
(
*
args
,
**
kwargs
):
...
...
@@ -59,7 +59,7 @@ def triton_worker():
return
decorator
def
triton
_endpoint
(
def
dynemo
_endpoint
(
request_model
:
Union
[
Type
[
BaseModel
],
Type
[
Any
]],
response_model
:
Type
[
BaseModel
]
)
->
Callable
:
def
decorator
(
...
...
lib/bindings/python/tests/soak.py
View file @
1af7433b
...
...
@@ -19,7 +19,7 @@ import string
import
uvloop
from
triton_distributed
.runtime
import
DistributedRuntime
,
triton
_worker
from
dynemo
.runtime
import
DistributedRuntime
,
dynemo
_worker
# Soak Test
#
...
...
@@ -31,7 +31,7 @@ from triton_distributed.runtime import DistributedRuntime, triton_worker
# could still eventually be a problem.
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
):
ns
=
random_string
()
task
=
asyncio
.
create_task
(
server_init
(
runtime
,
ns
))
...
...
lib/bindings/python/tests/test_bindings_install.py
View file @
1af7433b
...
...
@@ -20,7 +20,7 @@ pytestmark = pytest.mark.pre_merge
def
test_bindings_install
():
# Verify python bindings to rust can be imported
import
triton_distributed
.runtime
as
tdr
import
dynemo
.runtime
as
tdr
# Placeholder to avoid unused import errors or removal by linters
assert
tdr
lib/bindings/python/tests/test_etcd_bindings.py
View file @
1af7433b
...
...
@@ -15,7 +15,7 @@
import
asyncio
from
triton_distributed
._core
import
DistributedRuntime
from
dynemo
._core
import
DistributedRuntime
async
def
test_simple_put_get
():
...
...
lib/llm/Cargo.lock
View file @
1af7433b
...
...
@@ -1387,6 +1387,109 @@ dependencies = [
"reborrow",
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"hf-hub 0.4.1",
"indexmap 2.7.1",
"insta",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"proptest",
"pyo3",
"regex",
"reqwest",
"rstest",
"semver",
"sentencepiece",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"tempfile",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.29",
"toktrie_hf_tokenizers 0.6.29",
"tracing",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "ed25519"
version = "2.2.3"
...
...
@@ -5747,109 +5850,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"hf-hub 0.4.1",
"indexmap 2.7.1",
"insta",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"proptest",
"pyo3",
"regex",
"reqwest",
"rstest",
"semver",
"sentencepiece",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"tempfile",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.29",
"toktrie_hf_tokenizers 0.6.29",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
...
...
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment