Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
dd31a322
Commit
dd31a322
authored
Mar 09, 2025
by
Neelay Shah
Committed by
GitHub
Mar 09, 2025
Browse files
chore: stragglers rename (#69)
Co-authored-by:
Harrison King Saturley-Hall
<
hsaturleyhal@nvidia.com
>
parent
efe82b86
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
9 additions
and
10 deletions
+9
-10
examples/python_rs/llm/vllm/scripts/disaggregated.sh
examples/python_rs/llm/vllm/scripts/disaggregated.sh
+2
-2
examples/python_rs/llm/vllm_nixl/router/prefill_worker.py
examples/python_rs/llm/vllm_nixl/router/prefill_worker.py
+1
-1
examples/python_rs/llm/vllm_nixl/router/worker.py
examples/python_rs/llm/vllm_nixl/router/worker.py
+1
-1
lib/bindings/cpp/nvllm-trt/include/nvidia/nvllm/nvllm_trt.h
lib/bindings/cpp/nvllm-trt/include/nvidia/nvllm/nvllm_trt.h
+1
-1
lib/bindings/python/uv.lock
lib/bindings/python/uv.lock
+1
-1
lib/runtime/src/component/service.rs
lib/runtime/src/component/service.rs
+1
-2
lib/runtime/src/lib.rs
lib/runtime/src/lib.rs
+2
-2
No files found.
examples/python_rs/llm/vllm/scripts/disaggregated.sh
View file @
dd31a322
...
...
@@ -99,7 +99,7 @@ PREFILL_CMD="VLLM_WORKER_MULTIPROC_METHOD=spawn CUDA_VISIBLE_DEVICES=0 \
--max-model-len 1000
\
--tensor-parallel-size 1
\
--kv-transfer-config
\
'{
\"
kv_connector
\"
:
\"
Triton
NcclConnector
\"
,
\"
kv_role
\"
:
\"
kv_producer
\"
,
\"
kv_rank
\"
:0,
\"
kv_parallel_size
\"
:2}'"
'{
\"
kv_connector
\"
:
\"
Dynamo
NcclConnector
\"
,
\"
kv_role
\"
:
\"
kv_producer
\"
,
\"
kv_rank
\"
:0,
\"
kv_parallel_size
\"
:2}'"
tmux
select
-pane
-t
2
tmux send-keys
"
$INIT_CMD
&&
$PREFILL_CMD
"
C-m
...
...
@@ -115,7 +115,7 @@ DECODE_CMD="VLLM_WORKER_MULTIPROC_METHOD=spawn CUDA_VISIBLE_DEVICES=1 \
--max-model-len 1000
\
--tensor-parallel-size 1
\
--kv-transfer-config
\
'{
\"
kv_connector
\"
:
\"
Triton
NcclConnector
\"
,
\"
kv_role
\"
:
\"
kv_consumer
\"
,
\"
kv_rank
\"
:1,
\"
kv_parallel_size
\"
:2}'"
'{
\"
kv_connector
\"
:
\"
Dynamo
NcclConnector
\"
,
\"
kv_role
\"
:
\"
kv_consumer
\"
,
\"
kv_rank
\"
:1,
\"
kv_parallel_size
\"
:2}'"
tmux
select
-pane
-t
3
tmux send-keys
"
$INIT_CMD
&&
$DECODE_CMD
"
C-m
...
...
examples/python_rs/llm/vllm_nixl/router/prefill_worker.py
View file @
dd31a322
...
...
@@ -94,7 +94,7 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
request_handler
=
RequestHandler
(
engine_client
,
metadata_store
)
# TODO: integrate prefill_queue to a
n triton_distributed
endpoint
# TODO: integrate prefill_queue to a
dynamo
endpoint
async
with
PrefillQueue
.
get_instance
(
nats_server
=
prefill_queue_nats_server
,
stream_name
=
prefill_queue_stream_name
,
...
...
examples/python_rs/llm/vllm_nixl/router/worker.py
View file @
dd31a322
...
...
@@ -70,7 +70,7 @@ class RequestHandler:
print
(
"RequestHandler initialized"
)
def
get_remote_prefill_request_callback
(
self
):
# TODO: integrate prefill_queue to
an triton_distributed
endpoint
# TODO: integrate prefill_queue to
dynamo
endpoint
async
def
callback
(
request
:
RemotePrefillRequest
):
async
with
PrefillQueue
.
get_instance
(
nats_server
=
self
.
_prefill_queue_nats_server
,
...
...
lib/bindings/cpp/nvllm-trt/include/nvidia/nvllm/nvllm_trt.h
View file @
dd31a322
...
...
@@ -33,7 +33,7 @@ nvllm_trt_engine_t nvllm_trt_engine_create(const char* config_proto);
// Create a nvLLM TRT Engine from an instance of the engine
// This requires the raw engine pointer to be an instantiated object at the exact same
// commit version as the version of TRTLLM used to build the nvLLM C API.
// This is a workaround to enable the
Triton
TensorRT LLM backend to use nvLLM.
// This is a workaround to enable the
Dynamo
TensorRT LLM backend to use nvLLM.
nvllm_trt_engine_t
nvllm_trt_engine_unsafe_create_from_executor
(
void
*
engine
);
// Source: Enqueue a streaming request via a json message to the request queue
...
...
lib/bindings/python/uv.lock
View file @
dd31a322
...
...
@@ -100,7 +100,7 @@ wheels = [
]
[[package]]
name = "
triton-distributed-rs
"
name = "
dynamo
"
version = "0.1.3"
source = { editable = "." }
dependencies = [
...
...
lib/runtime/src/component/service.rs
View file @
dd31a322
...
...
@@ -27,8 +27,7 @@ pub type StatsHandler =
pub
type
EndpointStatsHandler
=
Box
<
dyn
FnMut
(
endpoint
::
Stats
)
->
serde_json
::
Value
+
Send
+
Sync
+
'static
>
;
// TODO(rename) - pending rename of project
pub
const
PROJECT_NAME
:
&
str
=
"Triton"
;
pub
const
PROJECT_NAME
:
&
str
=
"Dynamo"
;
#[derive(Educe,
Builder,
Dissolve)]
#[educe(Debug)]
...
...
lib/runtime/src/lib.rs
View file @
dd31a322
...
...
@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//!
Triton
//!
Dynamo
#![allow(dead_code)]
#![allow(unused_imports)]
...
...
@@ -49,7 +49,7 @@ pub use futures::stream;
pub
use
tokio_util
::
sync
::
CancellationToken
;
pub
use
worker
::
Worker
;
/// Types of Tokio runtimes that can be used to construct a
Triton
[Runtime].
/// Types of Tokio runtimes that can be used to construct a
Dynamo
[Runtime].
#[derive(Clone)]
enum
RuntimeType
{
Shared
(
Arc
<
tokio
::
runtime
::
Runtime
>
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment