Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
626d7e18
"docs/vscode:/vscode.git/clone" did not exist on "729b5fd5355ebe6ca36bdfda08bb31e51d744d18"
Unverified
Commit
626d7e18
authored
Aug 20, 2025
by
Michael Feil
Committed by
GitHub
Aug 20, 2025
Browse files
feat(request cancellation): pycontext, propagating the `is_stopped` into python land. (#2158)
parent
49958435
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
113 additions
and
10 deletions
+113
-10
Cargo.lock
Cargo.lock
+3
-3
examples/runtime/hello_world/hello_world.py
examples/runtime/hello_world/hello_world.py
+6
-3
lib/bindings/python/rust/context.rs
lib/bindings/python/rust/context.rs
+76
-0
lib/bindings/python/rust/engine.rs
lib/bindings/python/rust/engine.rs
+25
-4
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+2
-0
lib/bindings/python/src/dynamo/runtime/__init__.py
lib/bindings/python/src/dynamo/runtime/__init__.py
+1
-0
No files found.
Cargo.lock
View file @
626d7e18
...
@@ -1446,9 +1446,9 @@ dependencies = [
...
@@ -1446,9 +1446,9 @@ dependencies = [
[[package]]
[[package]]
name = "cudarc"
name = "cudarc"
version = "0.17.
1
"
version = "0.17.
2
"
source = "registry+https://github.com/rust-lang/crates.io-index"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "
018e09f92e57618dbae5a3a0dcc2026547eed0e5b6a503a32c11ee1
a9
4
89
0830
"
checksum = "
8147ca46109d41cc513fd629b52bbea9bd09b972034c2f32954ce84
a9
2
89
5a91
"
dependencies = [
dependencies = [
"libloading",
"libloading",
]
]
...
@@ -1924,7 +1924,7 @@ dependencies = [
...
@@ -1924,7 +1924,7 @@ dependencies = [
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono",
"chrono",
"criterion",
"criterion",
"cudarc 0.17.
1
",
"cudarc 0.17.
2
",
"dashmap",
"dashmap",
"derive-getters",
"derive-getters",
"derive_builder",
"derive_builder",
...
...
examples/runtime/hello_world/hello_world.py
View file @
626d7e18
...
@@ -18,7 +18,7 @@ import logging
...
@@ -18,7 +18,7 @@ import logging
import
uvloop
import
uvloop
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_endpoint
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
PyContext
,
dynamo_endpoint
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -26,10 +26,13 @@ configure_dynamo_logging(service_name="backend")
...
@@ -26,10 +26,13 @@ configure_dynamo_logging(service_name="backend")
@
dynamo_endpoint
(
str
,
str
)
@
dynamo_endpoint
(
str
,
str
)
async
def
content_generator
(
request
:
str
):
async
def
content_generator
(
request
:
str
,
context
:
PyContext
):
logger
.
info
(
f
"Received request:
{
request
}
"
)
logger
.
info
(
f
"Received request:
{
request
}
with `id=
{
context
.
id
()
}
`
"
)
for
word
in
request
.
split
(
","
):
for
word
in
request
.
split
(
","
):
await
asyncio
.
sleep
(
1
)
await
asyncio
.
sleep
(
1
)
if
context
.
is_stopped
()
or
context
.
is_killed
():
print
(
"request got cancelled."
)
return
yield
f
"Hello
{
word
}
!"
yield
f
"Hello
{
word
}
!"
...
...
lib/bindings/python/rust/context.rs
0 → 100644
View file @
626d7e18
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// PyContext is a wrapper around the AsyncEngineContext to allow for Python bindings.
pub
use
dynamo_runtime
::
pipeline
::
AsyncEngineContext
;
use
pyo3
::
prelude
::
*
;
use
std
::
sync
::
Arc
;
// PyContext is a wrapper around the AsyncEngineContext to allow for Python bindings.
// Not all methods of the AsyncEngineContext are exposed, jsut the primary ones for tracing + cancellation.
// Kept as class, to allow for future expansion if needed.
#[pyclass]
pub
struct
PyContext
{
pub
inner
:
Arc
<
dyn
AsyncEngineContext
>
,
}
impl
PyContext
{
pub
fn
new
(
inner
:
Arc
<
dyn
AsyncEngineContext
>
)
->
Self
{
Self
{
inner
}
}
}
#[pymethods]
impl
PyContext
{
// sync method of `await async_is_stopped()`
fn
is_stopped
(
&
self
)
->
bool
{
self
.inner
.is_stopped
()
}
// sync method of `await async_is_killed()`
fn
is_killed
(
&
self
)
->
bool
{
self
.inner
.is_killed
()
}
// issues a stop generating
fn
stop_generating
(
&
self
)
{
self
.inner
.stop_generating
();
}
fn
id
(
&
self
)
->
&
str
{
self
.inner
.id
()
}
// allows building a async callback.
fn
async_killed_or_stopped
<
'a
>
(
&
self
,
py
:
Python
<
'a
>
)
->
PyResult
<
Bound
<
'a
,
PyAny
>>
{
let
inner
=
self
.inner
.clone
();
pyo3_async_runtimes
::
tokio
::
future_into_py
(
py
,
async
move
{
tokio
::
select!
{
_
=
inner
.killed
()
=>
{
Ok
(
true
)
}
_
=
inner
.stopped
()
=>
{
Ok
(
true
)
}
}
})
}
}
// PyO3 equivalent for verify if signature contains target_name
// def callable_accepts_kwarg(target_name: str):
// import inspect
// return target_name in inspect.signature(func).parameters
pub
fn
callable_accepts_kwarg
(
py
:
Python
,
callable
:
&
Bound
<
'_
,
PyAny
>
,
target_name
:
&
str
,
)
->
PyResult
<
bool
>
{
let
inspect
:
Bound
<
'_
,
PyModule
>
=
py
.import
(
"inspect"
)
?
;
let
signature
=
inspect
.call_method1
(
"signature"
,
(
callable
,))
?
;
let
params_any
:
Bound
<
'_
,
PyAny
>
=
signature
.getattr
(
"parameters"
)
?
;
params_any
.call_method1
(
"__contains__"
,
(
target_name
,))
?
.extract
::
<
bool
>
()
}
lib/bindings/python/rust/engine.rs
View file @
626d7e18
...
@@ -13,11 +13,13 @@
...
@@ -13,11 +13,13 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
use
std
::
sync
::
Arc
;
use
super
::
context
::{
callable_accepts_kwarg
,
PyContext
};
use
pyo3
::
prelude
::
*
;
use
pyo3
::
prelude
::
*
;
use
pyo3
::
types
::{
PyDict
,
PyModule
};
use
pyo3
::{
PyAny
,
PyErr
};
use
pyo3_async_runtimes
::
TaskLocals
;
use
pyo3_async_runtimes
::
TaskLocals
;
use
pythonize
::{
depythonize
,
pythonize
};
use
pythonize
::{
depythonize
,
pythonize
};
use
std
::
sync
::
Arc
;
use
tokio
::
sync
::
mpsc
;
use
tokio
::
sync
::
mpsc
;
use
tokio_stream
::{
wrappers
::
ReceiverStream
,
StreamExt
};
use
tokio_stream
::{
wrappers
::
ReceiverStream
,
StreamExt
};
...
@@ -36,7 +38,6 @@ pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
...
@@ -36,7 +38,6 @@ pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m
.add_class
::
<
PythonAsyncEngine
>
()
?
;
m
.add_class
::
<
PythonAsyncEngine
>
()
?
;
Ok
(())
Ok
(())
}
}
// todos:
// todos:
// - [ ] enable context cancellation
// - [ ] enable context cancellation
// - this will likely require a change to the function signature python calling arguments
// - this will likely require a change to the function signature python calling arguments
...
@@ -113,6 +114,7 @@ pub struct PythonServerStreamingEngine {
...
@@ -113,6 +114,7 @@ pub struct PythonServerStreamingEngine {
_
cancel_token
:
CancellationToken
,
_
cancel_token
:
CancellationToken
,
generator
:
Arc
<
PyObject
>
,
generator
:
Arc
<
PyObject
>
,
event_loop
:
Arc
<
PyObject
>
,
event_loop
:
Arc
<
PyObject
>
,
has_pycontext
:
bool
,
}
}
impl
PythonServerStreamingEngine
{
impl
PythonServerStreamingEngine
{
...
@@ -121,10 +123,16 @@ impl PythonServerStreamingEngine {
...
@@ -121,10 +123,16 @@ impl PythonServerStreamingEngine {
generator
:
Arc
<
PyObject
>
,
generator
:
Arc
<
PyObject
>
,
event_loop
:
Arc
<
PyObject
>
,
event_loop
:
Arc
<
PyObject
>
,
)
->
Self
{
)
->
Self
{
let
has_pycontext
=
Python
::
with_gil
(|
py
|
{
let
callable
=
generator
.bind
(
py
);
callable_accepts_kwarg
(
py
,
callable
,
"context"
)
.unwrap_or
(
false
)
});
PythonServerStreamingEngine
{
PythonServerStreamingEngine
{
_
cancel_token
:
cancel_token
,
_
cancel_token
:
cancel_token
,
generator
,
generator
,
event_loop
,
event_loop
,
has_pycontext
,
}
}
}
}
}
}
...
@@ -166,6 +174,8 @@ where
...
@@ -166,6 +174,8 @@ where
let
generator
=
self
.generator
.clone
();
let
generator
=
self
.generator
.clone
();
let
event_loop
=
self
.event_loop
.clone
();
let
event_loop
=
self
.event_loop
.clone
();
let
ctx_python
=
ctx
.clone
();
let
has_pycontext
=
self
.has_pycontext
;
// Acquiring the GIL is similar to acquiring a standard lock/mutex
// Acquiring the GIL is similar to acquiring a standard lock/mutex
// Performing this in an tokio async task could block the thread for an undefined amount of time
// Performing this in an tokio async task could block the thread for an undefined amount of time
...
@@ -180,7 +190,18 @@ where
...
@@ -180,7 +190,18 @@ where
let
stream
=
tokio
::
task
::
spawn_blocking
(
move
||
{
let
stream
=
tokio
::
task
::
spawn_blocking
(
move
||
{
Python
::
with_gil
(|
py
|
{
Python
::
with_gil
(|
py
|
{
let
py_request
=
pythonize
(
py
,
&
request
)
?
;
let
py_request
=
pythonize
(
py
,
&
request
)
?
;
let
gen
=
generator
.call1
(
py
,
(
py_request
,))
?
;
let
py_ctx
=
Py
::
new
(
py
,
PyContext
::
new
(
ctx_python
.clone
()))
?
;
let
gen
=
if
has_pycontext
{
// Pass context as a kwarg
let
kwarg
=
PyDict
::
new
(
py
);
kwarg
.set_item
(
"context"
,
&
py_ctx
)
?
;
generator
.call
(
py
,
(
py_request
,),
Some
(
&
kwarg
))
}
else
{
// Legacy: No `context` arg
generator
.call1
(
py
,
(
py_request
,))
}
?
;
let
locals
=
TaskLocals
::
new
(
event_loop
.bind
(
py
)
.clone
());
let
locals
=
TaskLocals
::
new
(
event_loop
.bind
(
py
)
.clone
());
pyo3_async_runtimes
::
tokio
::
into_stream_with_locals_v1
(
locals
,
gen
.into_bound
(
py
))
pyo3_async_runtimes
::
tokio
::
into_stream_with_locals_v1
(
locals
,
gen
.into_bound
(
py
))
})
})
...
...
lib/bindings/python/rust/lib.rs
View file @
626d7e18
...
@@ -45,6 +45,7 @@ impl From<RouterMode> for RsRouterMode {
...
@@ -45,6 +45,7 @@ impl From<RouterMode> for RsRouterMode {
}
}
}
}
mod
context
;
mod
engine
;
mod
engine
;
mod
http
;
mod
http
;
mod
llm
;
mod
llm
;
...
@@ -103,6 +104,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
...
@@ -103,6 +104,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m
.add_class
::
<
http
::
HttpService
>
()
?
;
m
.add_class
::
<
http
::
HttpService
>
()
?
;
m
.add_class
::
<
http
::
HttpError
>
()
?
;
m
.add_class
::
<
http
::
HttpError
>
()
?
;
m
.add_class
::
<
http
::
HttpAsyncEngine
>
()
?
;
m
.add_class
::
<
http
::
HttpAsyncEngine
>
()
?
;
m
.add_class
::
<
context
::
PyContext
>
()
?
;
m
.add_class
::
<
EtcdKvCache
>
()
?
;
m
.add_class
::
<
EtcdKvCache
>
()
?
;
m
.add_class
::
<
ModelType
>
()
?
;
m
.add_class
::
<
ModelType
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
ForwardPassMetrics
>
()
?
;
m
.add_class
::
<
llm
::
kv
::
ForwardPassMetrics
>
()
?
;
...
...
lib/bindings/python/src/dynamo/runtime/__init__.py
View file @
626d7e18
...
@@ -30,6 +30,7 @@ from dynamo._core import Endpoint as Endpoint
...
@@ -30,6 +30,7 @@ from dynamo._core import Endpoint as Endpoint
from
dynamo._core
import
EtcdKvCache
as
EtcdKvCache
from
dynamo._core
import
EtcdKvCache
as
EtcdKvCache
from
dynamo._core
import
ModelDeploymentCard
as
ModelDeploymentCard
from
dynamo._core
import
ModelDeploymentCard
as
ModelDeploymentCard
from
dynamo._core
import
OAIChatPreprocessor
as
OAIChatPreprocessor
from
dynamo._core
import
OAIChatPreprocessor
as
OAIChatPreprocessor
from
dynamo._core
import
PyContext
as
PyContext
def
dynamo_worker
(
static
=
False
):
def
dynamo_worker
(
static
=
False
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment