Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5f179186
Unverified
Commit
5f179186
authored
Jul 18, 2025
by
Tanmay Verma
Committed by
GitHub
Jul 18, 2025
Browse files
refactor: Migrate to new UX2 for python launch (#2003)
parent
fc124360
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
42 additions
and
62 deletions
+42
-62
components/backends/trtllm/launch/agg.sh
components/backends/trtllm/launch/agg.sh
+3
-3
components/backends/trtllm/launch/agg_router.sh
components/backends/trtllm/launch/agg_router.sh
+3
-3
components/backends/trtllm/launch/disagg.sh
components/backends/trtllm/launch/disagg.sh
+4
-4
components/backends/trtllm/launch/disagg_router.sh
components/backends/trtllm/launch/disagg_router.sh
+4
-4
components/backends/trtllm/multinode/start_frontend_services.sh
...ents/backends/trtllm/multinode/start_frontend_services.sh
+1
-1
components/backends/trtllm/multinode/start_trtllm_worker.sh
components/backends/trtllm/multinode/start_trtllm_worker.sh
+1
-1
components/backends/trtllm/src/dynamo/trtllm/__init__.py
components/backends/trtllm/src/dynamo/trtllm/__init__.py
+2
-0
components/backends/trtllm/src/dynamo/trtllm/__main__.py
components/backends/trtllm/src/dynamo/trtllm/__main__.py
+6
-0
components/backends/trtllm/src/dynamo/trtllm/main.py
components/backends/trtllm/src/dynamo/trtllm/main.py
+9
-40
components/backends/trtllm/src/dynamo/trtllm/utils/disagg_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/disagg_utils.py
+0
-0
components/backends/trtllm/src/dynamo/trtllm/utils/request_handlers/handler_base.py
.../src/dynamo/trtllm/utils/request_handlers/handler_base.py
+4
-1
components/backends/trtllm/src/dynamo/trtllm/utils/request_handlers/handlers.py
...tllm/src/dynamo/trtllm/utils/request_handlers/handlers.py
+1
-1
components/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
+3
-3
pyproject.toml
pyproject.toml
+1
-1
No files found.
components/backends/trtllm/launch/agg.sh
View file @
5f179186
...
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
# run
frontend
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
# run worker
python3
components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
components/backends/trtllm/launch/agg_router.sh
View file @
5f179186
...
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
dynamo run
in
=
http
out
=
dyn
--router-mode
kv
--http-port
=
8000 &
# run
frontend
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
8000 &
DYNAMO_PID
=
$!
# run worker
python3
components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/launch/disagg.sh
View file @
5f179186
...
...
@@ -23,12 +23,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
# run
frontend
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
...
...
@@ -37,7 +37,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
PREFILL_PID
=
$!
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/launch/disagg_router.sh
View file @
5f179186
...
...
@@ -23,8 +23,8 @@ trap cleanup EXIT INT TERM
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
dynamo run
in
=
http
out
=
dyn
--router-mode
kv
--http-port
=
8000 &
# run
frontend
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
8000 &
DYNAMO_PID
=
$!
...
...
@@ -37,7 +37,7 @@ else
fi
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
...
...
@@ -47,7 +47,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
PREFILL_PID
=
$!
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/multinode/start_frontend_services.sh
View file @
5f179186
...
...
@@ -13,4 +13,4 @@ sleep 3
# Start OpenAI Frontend which will dynamically discover workers when they startup
# NOTE: This is a blocking call.
dynamo-run
in
=
http
out
=
dyn
--http-port
8000
python3
-m
dynamo.frontend
--http-port
8000
components/backends/trtllm/multinode/start_trtllm_worker.sh
View file @
5f179186
...
...
@@ -39,7 +39,7 @@ if [[ -n ${DISAGGREGATION_STRATEGY} ]]; then
fi
trtllm-llmapi-launch
\
python3
/mnt/components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
${
MODEL_PATH
}
"
\
--served-model-name
"
${
SERVED_MODEL_NAME
}
"
\
--extra-engine-args
"
${
ENGINE_CONFIG
}
"
\
...
...
components/backends/trtllm/src/dynamo/trtllm/__init__.py
0 → 100644
View file @
5f179186
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
components/backends/trtllm/src/dynamo/trtllm/__main__.py
0 → 100644
View file @
5f179186
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from
dynamo.trtllm.main
import
main
if
__name__
==
"__main__"
:
main
()
components/backends/trtllm/
components/worker
.py
→
components/backends/trtllm/
src/dynamo/trtllm/main
.py
View file @
5f179186
...
...
@@ -3,10 +3,8 @@
import
asyncio
import
logging
import
os
import
signal
import
sys
from
typing
import
TYPE_CHECKING
import
uvloop
from
tensorrt_llm
import
SamplingParams
...
...
@@ -21,48 +19,16 @@ from dynamo.llm import (
)
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
if
TYPE_CHECKING
:
from
utils.trtllm_utils
import
Config
def
_setup_path_and_imports
():
"""Setup path and import utils modules"""
# Add the parent directory to the Python path so we can import utils
parent_dir
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
if
parent_dir
not
in
sys
.
path
:
sys
.
path
.
insert
(
0
,
parent_dir
)
from
utils.request_handlers.handlers
import
(
RequestHandlerConfig
,
RequestHandlerFactory
,
)
from
utils.trtllm_utils
import
(
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
return
(
RequestHandlerConfig
,
RequestHandlerFactory
,
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
# Import utils modules
(
from
dynamo.trtllm.utils.request_handlers.handlers
import
(
RequestHandlerConfig
,
RequestHandlerFactory
,
)
from
dynamo.trtllm.utils.trtllm_utils
import
(
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
=
_setup_path_and_imports
()
)
# Default buffer size for kv cache events.
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
=
1024
...
...
@@ -205,6 +171,9 @@ async def init(runtime: DistributedRuntime, config: Config):
await
endpoint
.
serve_endpoint
(
handler
.
generate
)
def
main
():
uvloop
.
run
(
worker
())
if
__name__
==
"__main__"
:
uvloop
.
install
()
asyncio
.
run
(
worker
())
main
()
components/backends/trtllm/utils/disagg_utils.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/disagg_utils.py
View file @
5f179186
File moved
components/backends/trtllm/utils/request_handlers/handler_base.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/request_handlers/handler_base.py
View file @
5f179186
...
...
@@ -19,11 +19,14 @@ from enum import Enum
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm.llmapi
import
DisaggregatedParams
as
LlmDisaggregatedParams
from
utils.disagg_utils
import
DisaggregatedParams
,
DisaggregatedParamsCodec
from
dynamo.llm.tensorrtllm.engine
import
TensorRTLLMEngine
from
dynamo.llm.tensorrtllm.publisher
import
Publisher
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.utils.disagg_utils
import
(
DisaggregatedParams
,
DisaggregatedParamsCodec
,
)
configure_dynamo_logging
()
...
...
components/backends/trtllm/utils/request_handlers/handlers.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/request_handlers/handlers.py
View file @
5f179186
...
...
@@ -3,7 +3,7 @@
import
copy
from
utils.request_handlers.handler_base
import
(
from
dynamo.trtllm.
utils.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationStrategy
,
HandlerBase
,
...
...
components/backends/trtllm/utils/trtllm_utils.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/trtllm_utils.py
View file @
5f179186
...
...
@@ -4,14 +4,14 @@
import
argparse
from
typing
import
Optional
from
utils.request_handlers.handler_base
import
(
from
dynamo.trtllm.
utils.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationStrategy
,
)
# Default endpoint for the next worker.
DEFAULT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm.generate"
DEFAULT_MODEL_PATH
=
"TinyLlama
-1.1B-Instruct
"
DEFAULT_MODEL_PATH
=
"TinyLlama
/TinyLlama-1.1B-Chat-v1.0
"
DEFAULT_NEXT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm_next.generate"
DEFAULT_DISAGGREGATION_STRATEGY
=
DisaggregationStrategy
.
DECODE_FIRST
DEFAULT_DISAGGREGATION_MODE
=
DisaggregationMode
.
AGGREGATED
...
...
@@ -123,7 +123,7 @@ def cmd_line_args():
parser
.
add_argument
(
"--publish-events-and-metrics"
,
action
=
"store_true"
,
help
=
"
P
ublish events and metrics to the dynamo components.
Note: This is not supported when running in prefill disaggregation mode.
"
,
help
=
"
If set, p
ublish events and metrics to the dynamo components."
,
)
parser
.
add_argument
(
"--disaggregation-mode"
,
...
...
pyproject.toml
View file @
5f179186
...
...
@@ -79,7 +79,7 @@ requires = ["hatchling"]
build-backend
=
"hatchling.build"
[tool.hatch.build.targets.wheel]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/frontend/src/dynamo"
,
"components/backends/llama_cpp/src/dynamo"
,
"components/backends/mocker/src/dynamo"
]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/frontend/src/dynamo"
,
"components/backends/llama_cpp/src/dynamo"
,
"components/backends/mocker/src/dynamo"
,
"components/backends/trtllm/src/dynamo"
]
# This section is for including the binaries in the wheel package
# but doesn't make them executable scripts in the venv bin directory
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment