Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5f179186
Unverified
Commit
5f179186
authored
Jul 18, 2025
by
Tanmay Verma
Committed by
GitHub
Jul 18, 2025
Browse files
refactor: Migrate to new UX2 for python launch (#2003)
parent
fc124360
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
42 additions
and
62 deletions
+42
-62
components/backends/trtllm/launch/agg.sh
components/backends/trtllm/launch/agg.sh
+3
-3
components/backends/trtllm/launch/agg_router.sh
components/backends/trtllm/launch/agg_router.sh
+3
-3
components/backends/trtllm/launch/disagg.sh
components/backends/trtllm/launch/disagg.sh
+4
-4
components/backends/trtllm/launch/disagg_router.sh
components/backends/trtllm/launch/disagg_router.sh
+4
-4
components/backends/trtllm/multinode/start_frontend_services.sh
...ents/backends/trtllm/multinode/start_frontend_services.sh
+1
-1
components/backends/trtllm/multinode/start_trtllm_worker.sh
components/backends/trtllm/multinode/start_trtllm_worker.sh
+1
-1
components/backends/trtllm/src/dynamo/trtllm/__init__.py
components/backends/trtllm/src/dynamo/trtllm/__init__.py
+2
-0
components/backends/trtllm/src/dynamo/trtllm/__main__.py
components/backends/trtllm/src/dynamo/trtllm/__main__.py
+6
-0
components/backends/trtllm/src/dynamo/trtllm/main.py
components/backends/trtllm/src/dynamo/trtllm/main.py
+9
-40
components/backends/trtllm/src/dynamo/trtllm/utils/disagg_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/disagg_utils.py
+0
-0
components/backends/trtllm/src/dynamo/trtllm/utils/request_handlers/handler_base.py
.../src/dynamo/trtllm/utils/request_handlers/handler_base.py
+4
-1
components/backends/trtllm/src/dynamo/trtllm/utils/request_handlers/handlers.py
...tllm/src/dynamo/trtllm/utils/request_handlers/handlers.py
+1
-1
components/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
+3
-3
pyproject.toml
pyproject.toml
+1
-1
No files found.
components/backends/trtllm/launch/agg.sh
View file @
5f179186
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run worker
# run worker
python3
components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
components/backends/trtllm/launch/agg_router.sh
View file @
5f179186
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--router-mode
kv
--http-port
=
8000 &
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run worker
# run worker
python3
components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/launch/disagg.sh
View file @
5f179186
...
@@ -23,12 +23,12 @@ trap cleanup EXIT INT TERM
...
@@ -23,12 +23,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run prefill worker
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
...
@@ -37,7 +37,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
...
@@ -37,7 +37,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
PREFILL_PID
=
$!
PREFILL_PID
=
$!
# run decode worker
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/launch/disagg_router.sh
View file @
5f179186
...
@@ -23,8 +23,8 @@ trap cleanup EXIT INT TERM
...
@@ -23,8 +23,8 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--router-mode
kv
--http-port
=
8000 &
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
...
@@ -37,7 +37,7 @@ else
...
@@ -37,7 +37,7 @@ else
fi
fi
# run prefill worker
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
...
@@ -47,7 +47,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
...
@@ -47,7 +47,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
PREFILL_PID
=
$!
PREFILL_PID
=
$!
# run decode worker
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/multinode/start_frontend_services.sh
View file @
5f179186
...
@@ -13,4 +13,4 @@ sleep 3
...
@@ -13,4 +13,4 @@ sleep 3
# Start OpenAI Frontend which will dynamically discover workers when they startup
# Start OpenAI Frontend which will dynamically discover workers when they startup
# NOTE: This is a blocking call.
# NOTE: This is a blocking call.
dynamo-run
in
=
http
out
=
dyn
--http-port
8000
python3
-m
dynamo.frontend
--http-port
8000
components/backends/trtllm/multinode/start_trtllm_worker.sh
View file @
5f179186
...
@@ -39,7 +39,7 @@ if [[ -n ${DISAGGREGATION_STRATEGY} ]]; then
...
@@ -39,7 +39,7 @@ if [[ -n ${DISAGGREGATION_STRATEGY} ]]; then
fi
fi
trtllm-llmapi-launch
\
trtllm-llmapi-launch
\
python3
/mnt/components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
${
MODEL_PATH
}
"
\
--model-path
"
${
MODEL_PATH
}
"
\
--served-model-name
"
${
SERVED_MODEL_NAME
}
"
\
--served-model-name
"
${
SERVED_MODEL_NAME
}
"
\
--extra-engine-args
"
${
ENGINE_CONFIG
}
"
\
--extra-engine-args
"
${
ENGINE_CONFIG
}
"
\
...
...
components/backends/trtllm/src/dynamo/trtllm/__init__.py
0 → 100644
View file @
5f179186
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
components/backends/trtllm/src/dynamo/trtllm/__main__.py
0 → 100644
View file @
5f179186
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from
dynamo.trtllm.main
import
main
if
__name__
==
"__main__"
:
main
()
components/backends/trtllm/
components/worker
.py
→
components/backends/trtllm/
src/dynamo/trtllm/main
.py
View file @
5f179186
...
@@ -3,10 +3,8 @@
...
@@ -3,10 +3,8 @@
import
asyncio
import
asyncio
import
logging
import
logging
import
os
import
signal
import
signal
import
sys
import
sys
from
typing
import
TYPE_CHECKING
import
uvloop
import
uvloop
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm
import
SamplingParams
...
@@ -21,48 +19,16 @@ from dynamo.llm import (
...
@@ -21,48 +19,16 @@ from dynamo.llm import (
)
)
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.utils.request_handlers.handlers
import
(
if
TYPE_CHECKING
:
from
utils.trtllm_utils
import
Config
def
_setup_path_and_imports
():
"""Setup path and import utils modules"""
# Add the parent directory to the Python path so we can import utils
parent_dir
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
if
parent_dir
not
in
sys
.
path
:
sys
.
path
.
insert
(
0
,
parent_dir
)
from
utils.request_handlers.handlers
import
(
RequestHandlerConfig
,
RequestHandlerFactory
,
)
from
utils.trtllm_utils
import
(
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
return
(
RequestHandlerConfig
,
RequestHandlerFactory
,
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
# Import utils modules
(
RequestHandlerConfig
,
RequestHandlerConfig
,
RequestHandlerFactory
,
RequestHandlerFactory
,
)
from
dynamo.trtllm.utils.trtllm_utils
import
(
Config
,
Config
,
cmd_line_args
,
cmd_line_args
,
is_first_worker
,
is_first_worker
,
parse_endpoint
,
parse_endpoint
,
)
=
_setup_path_and_imports
()
)
# Default buffer size for kv cache events.
# Default buffer size for kv cache events.
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
=
1024
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
=
1024
...
@@ -205,6 +171,9 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -205,6 +171,9 @@ async def init(runtime: DistributedRuntime, config: Config):
await
endpoint
.
serve_endpoint
(
handler
.
generate
)
await
endpoint
.
serve_endpoint
(
handler
.
generate
)
def
main
():
uvloop
.
run
(
worker
())
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
uvloop
.
install
()
main
()
asyncio
.
run
(
worker
())
components/backends/trtllm/utils/disagg_utils.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/disagg_utils.py
View file @
5f179186
File moved
components/backends/trtllm/utils/request_handlers/handler_base.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/request_handlers/handler_base.py
View file @
5f179186
...
@@ -19,11 +19,14 @@ from enum import Enum
...
@@ -19,11 +19,14 @@ from enum import Enum
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm.llmapi
import
DisaggregatedParams
as
LlmDisaggregatedParams
from
tensorrt_llm.llmapi
import
DisaggregatedParams
as
LlmDisaggregatedParams
from
utils.disagg_utils
import
DisaggregatedParams
,
DisaggregatedParamsCodec
from
dynamo.llm.tensorrtllm.engine
import
TensorRTLLMEngine
from
dynamo.llm.tensorrtllm.engine
import
TensorRTLLMEngine
from
dynamo.llm.tensorrtllm.publisher
import
Publisher
from
dynamo.llm.tensorrtllm.publisher
import
Publisher
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.utils.disagg_utils
import
(
DisaggregatedParams
,
DisaggregatedParamsCodec
,
)
configure_dynamo_logging
()
configure_dynamo_logging
()
...
...
components/backends/trtllm/utils/request_handlers/handlers.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/request_handlers/handlers.py
View file @
5f179186
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
import
copy
import
copy
from
utils.request_handlers.handler_base
import
(
from
dynamo.trtllm.
utils.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationMode
,
DisaggregationStrategy
,
DisaggregationStrategy
,
HandlerBase
,
HandlerBase
,
...
...
components/backends/trtllm/utils/trtllm_utils.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/trtllm_utils.py
View file @
5f179186
...
@@ -4,14 +4,14 @@
...
@@ -4,14 +4,14 @@
import
argparse
import
argparse
from
typing
import
Optional
from
typing
import
Optional
from
utils.request_handlers.handler_base
import
(
from
dynamo.trtllm.
utils.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationMode
,
DisaggregationStrategy
,
DisaggregationStrategy
,
)
)
# Default endpoint for the next worker.
# Default endpoint for the next worker.
DEFAULT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm.generate"
DEFAULT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm.generate"
DEFAULT_MODEL_PATH
=
"TinyLlama
-1.1B-Instruct
"
DEFAULT_MODEL_PATH
=
"TinyLlama
/TinyLlama-1.1B-Chat-v1.0
"
DEFAULT_NEXT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm_next.generate"
DEFAULT_NEXT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm_next.generate"
DEFAULT_DISAGGREGATION_STRATEGY
=
DisaggregationStrategy
.
DECODE_FIRST
DEFAULT_DISAGGREGATION_STRATEGY
=
DisaggregationStrategy
.
DECODE_FIRST
DEFAULT_DISAGGREGATION_MODE
=
DisaggregationMode
.
AGGREGATED
DEFAULT_DISAGGREGATION_MODE
=
DisaggregationMode
.
AGGREGATED
...
@@ -123,7 +123,7 @@ def cmd_line_args():
...
@@ -123,7 +123,7 @@ def cmd_line_args():
parser
.
add_argument
(
parser
.
add_argument
(
"--publish-events-and-metrics"
,
"--publish-events-and-metrics"
,
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"
P
ublish events and metrics to the dynamo components.
Note: This is not supported when running in prefill disaggregation mode.
"
,
help
=
"
If set, p
ublish events and metrics to the dynamo components."
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--disaggregation-mode"
,
"--disaggregation-mode"
,
...
...
pyproject.toml
View file @
5f179186
...
@@ -79,7 +79,7 @@ requires = ["hatchling"]
...
@@ -79,7 +79,7 @@ requires = ["hatchling"]
build-backend
=
"hatchling.build"
build-backend
=
"hatchling.build"
[tool.hatch.build.targets.wheel]
[tool.hatch.build.targets.wheel]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/frontend/src/dynamo"
,
"components/backends/llama_cpp/src/dynamo"
,
"components/backends/mocker/src/dynamo"
]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/frontend/src/dynamo"
,
"components/backends/llama_cpp/src/dynamo"
,
"components/backends/mocker/src/dynamo"
,
"components/backends/trtllm/src/dynamo"
]
# This section is for including the binaries in the wheel package
# This section is for including the binaries in the wheel package
# but doesn't make them executable scripts in the venv bin directory
# but doesn't make them executable scripts in the venv bin directory
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment