Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5f179186
"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "6365a015b307418cba59d8fcfc75a6c41b7f5f41"
Unverified
Commit
5f179186
authored
Jul 18, 2025
by
Tanmay Verma
Committed by
GitHub
Jul 18, 2025
Browse files
refactor: Migrate to new UX2 for python launch (#2003)
parent
fc124360
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
42 additions
and
62 deletions
+42
-62
components/backends/trtllm/launch/agg.sh
components/backends/trtllm/launch/agg.sh
+3
-3
components/backends/trtllm/launch/agg_router.sh
components/backends/trtllm/launch/agg_router.sh
+3
-3
components/backends/trtllm/launch/disagg.sh
components/backends/trtllm/launch/disagg.sh
+4
-4
components/backends/trtllm/launch/disagg_router.sh
components/backends/trtllm/launch/disagg_router.sh
+4
-4
components/backends/trtllm/multinode/start_frontend_services.sh
...ents/backends/trtllm/multinode/start_frontend_services.sh
+1
-1
components/backends/trtllm/multinode/start_trtllm_worker.sh
components/backends/trtllm/multinode/start_trtllm_worker.sh
+1
-1
components/backends/trtllm/src/dynamo/trtllm/__init__.py
components/backends/trtllm/src/dynamo/trtllm/__init__.py
+2
-0
components/backends/trtllm/src/dynamo/trtllm/__main__.py
components/backends/trtllm/src/dynamo/trtllm/__main__.py
+6
-0
components/backends/trtllm/src/dynamo/trtllm/main.py
components/backends/trtllm/src/dynamo/trtllm/main.py
+9
-40
components/backends/trtllm/src/dynamo/trtllm/utils/disagg_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/disagg_utils.py
+0
-0
components/backends/trtllm/src/dynamo/trtllm/utils/request_handlers/handler_base.py
.../src/dynamo/trtllm/utils/request_handlers/handler_base.py
+4
-1
components/backends/trtllm/src/dynamo/trtllm/utils/request_handlers/handlers.py
...tllm/src/dynamo/trtllm/utils/request_handlers/handlers.py
+1
-1
components/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
+3
-3
pyproject.toml
pyproject.toml
+1
-1
No files found.
components/backends/trtllm/launch/agg.sh
View file @
5f179186
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run worker
# run worker
python3
components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
components/backends/trtllm/launch/agg_router.sh
View file @
5f179186
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
...
@@ -19,12 +19,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--router-mode
kv
--http-port
=
8000 &
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run worker
# run worker
python3
components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/launch/disagg.sh
View file @
5f179186
...
@@ -23,12 +23,12 @@ trap cleanup EXIT INT TERM
...
@@ -23,12 +23,12 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run prefill worker
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
...
@@ -37,7 +37,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
...
@@ -37,7 +37,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
PREFILL_PID
=
$!
PREFILL_PID
=
$!
# run decode worker
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/launch/disagg_router.sh
View file @
5f179186
...
@@ -23,8 +23,8 @@ trap cleanup EXIT INT TERM
...
@@ -23,8 +23,8 @@ trap cleanup EXIT INT TERM
# run clear_namespace
# run clear_namespace
python3 utils/clear_namespace.py
--namespace
dynamo
python3 utils/clear_namespace.py
--namespace
dynamo
# run
ingress
# run
frontend
dynamo run
in
=
http
out
=
dyn
--router-mode
kv
--http-port
=
8000 &
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
...
@@ -37,7 +37,7 @@ else
...
@@ -37,7 +37,7 @@ else
fi
fi
# run prefill worker
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
...
@@ -47,7 +47,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
...
@@ -47,7 +47,7 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 components/worker.py
PREFILL_PID
=
$!
PREFILL_PID
=
$!
# run decode worker
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
components/worker.py
\
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
...
...
components/backends/trtllm/multinode/start_frontend_services.sh
View file @
5f179186
...
@@ -13,4 +13,4 @@ sleep 3
...
@@ -13,4 +13,4 @@ sleep 3
# Start OpenAI Frontend which will dynamically discover workers when they startup
# Start OpenAI Frontend which will dynamically discover workers when they startup
# NOTE: This is a blocking call.
# NOTE: This is a blocking call.
dynamo-run
in
=
http
out
=
dyn
--http-port
8000
python3
-m
dynamo.frontend
--http-port
8000
components/backends/trtllm/multinode/start_trtllm_worker.sh
View file @
5f179186
...
@@ -39,7 +39,7 @@ if [[ -n ${DISAGGREGATION_STRATEGY} ]]; then
...
@@ -39,7 +39,7 @@ if [[ -n ${DISAGGREGATION_STRATEGY} ]]; then
fi
fi
trtllm-llmapi-launch
\
trtllm-llmapi-launch
\
python3
/mnt/components/worker.py
\
python3
-m
dynamo.trtllm
\
--model-path
"
${
MODEL_PATH
}
"
\
--model-path
"
${
MODEL_PATH
}
"
\
--served-model-name
"
${
SERVED_MODEL_NAME
}
"
\
--served-model-name
"
${
SERVED_MODEL_NAME
}
"
\
--extra-engine-args
"
${
ENGINE_CONFIG
}
"
\
--extra-engine-args
"
${
ENGINE_CONFIG
}
"
\
...
...
components/backends/trtllm/src/dynamo/trtllm/__init__.py
0 → 100644
View file @
5f179186
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
components/backends/trtllm/src/dynamo/trtllm/__main__.py
0 → 100644
View file @
5f179186
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from
dynamo.trtllm.main
import
main
if
__name__
==
"__main__"
:
main
()
components/backends/trtllm/
components/worker
.py
→
components/backends/trtllm/
src/dynamo/trtllm/main
.py
View file @
5f179186
...
@@ -3,10 +3,8 @@
...
@@ -3,10 +3,8 @@
import
asyncio
import
asyncio
import
logging
import
logging
import
os
import
signal
import
signal
import
sys
import
sys
from
typing
import
TYPE_CHECKING
import
uvloop
import
uvloop
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm
import
SamplingParams
...
@@ -21,48 +19,16 @@ from dynamo.llm import (
...
@@ -21,48 +19,16 @@ from dynamo.llm import (
)
)
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.utils.request_handlers.handlers
import
(
if
TYPE_CHECKING
:
from
utils.trtllm_utils
import
Config
def
_setup_path_and_imports
():
"""Setup path and import utils modules"""
# Add the parent directory to the Python path so we can import utils
parent_dir
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
if
parent_dir
not
in
sys
.
path
:
sys
.
path
.
insert
(
0
,
parent_dir
)
from
utils.request_handlers.handlers
import
(
RequestHandlerConfig
,
RequestHandlerFactory
,
)
from
utils.trtllm_utils
import
(
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
return
(
RequestHandlerConfig
,
RequestHandlerFactory
,
Config
,
cmd_line_args
,
is_first_worker
,
parse_endpoint
,
)
# Import utils modules
(
RequestHandlerConfig
,
RequestHandlerConfig
,
RequestHandlerFactory
,
RequestHandlerFactory
,
)
from
dynamo.trtllm.utils.trtllm_utils
import
(
Config
,
Config
,
cmd_line_args
,
cmd_line_args
,
is_first_worker
,
is_first_worker
,
parse_endpoint
,
parse_endpoint
,
)
=
_setup_path_and_imports
()
)
# Default buffer size for kv cache events.
# Default buffer size for kv cache events.
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
=
1024
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
=
1024
...
@@ -205,6 +171,9 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -205,6 +171,9 @@ async def init(runtime: DistributedRuntime, config: Config):
await
endpoint
.
serve_endpoint
(
handler
.
generate
)
await
endpoint
.
serve_endpoint
(
handler
.
generate
)
def
main
():
uvloop
.
run
(
worker
())
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
uvloop
.
install
()
main
()
asyncio
.
run
(
worker
())
components/backends/trtllm/utils/disagg_utils.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/disagg_utils.py
View file @
5f179186
File moved
components/backends/trtllm/utils/request_handlers/handler_base.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/request_handlers/handler_base.py
View file @
5f179186
...
@@ -19,11 +19,14 @@ from enum import Enum
...
@@ -19,11 +19,14 @@ from enum import Enum
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm.llmapi
import
DisaggregatedParams
as
LlmDisaggregatedParams
from
tensorrt_llm.llmapi
import
DisaggregatedParams
as
LlmDisaggregatedParams
from
utils.disagg_utils
import
DisaggregatedParams
,
DisaggregatedParamsCodec
from
dynamo.llm.tensorrtllm.engine
import
TensorRTLLMEngine
from
dynamo.llm.tensorrtllm.engine
import
TensorRTLLMEngine
from
dynamo.llm.tensorrtllm.publisher
import
Publisher
from
dynamo.llm.tensorrtllm.publisher
import
Publisher
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.utils.disagg_utils
import
(
DisaggregatedParams
,
DisaggregatedParamsCodec
,
)
configure_dynamo_logging
()
configure_dynamo_logging
()
...
...
components/backends/trtllm/utils/request_handlers/handlers.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/request_handlers/handlers.py
View file @
5f179186
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
import
copy
import
copy
from
utils.request_handlers.handler_base
import
(
from
dynamo.trtllm.
utils.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationMode
,
DisaggregationStrategy
,
DisaggregationStrategy
,
HandlerBase
,
HandlerBase
,
...
...
components/backends/trtllm/utils/trtllm_utils.py
→
components/backends/trtllm/
src/dynamo/trtllm/
utils/trtllm_utils.py
View file @
5f179186
...
@@ -4,14 +4,14 @@
...
@@ -4,14 +4,14 @@
import
argparse
import
argparse
from
typing
import
Optional
from
typing
import
Optional
from
utils.request_handlers.handler_base
import
(
from
dynamo.trtllm.
utils.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationMode
,
DisaggregationStrategy
,
DisaggregationStrategy
,
)
)
# Default endpoint for the next worker.
# Default endpoint for the next worker.
DEFAULT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm.generate"
DEFAULT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm.generate"
DEFAULT_MODEL_PATH
=
"TinyLlama
-1.1B-Instruct
"
DEFAULT_MODEL_PATH
=
"TinyLlama
/TinyLlama-1.1B-Chat-v1.0
"
DEFAULT_NEXT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm_next.generate"
DEFAULT_NEXT_ENDPOINT
=
"dyn://dynamo.tensorrt_llm_next.generate"
DEFAULT_DISAGGREGATION_STRATEGY
=
DisaggregationStrategy
.
DECODE_FIRST
DEFAULT_DISAGGREGATION_STRATEGY
=
DisaggregationStrategy
.
DECODE_FIRST
DEFAULT_DISAGGREGATION_MODE
=
DisaggregationMode
.
AGGREGATED
DEFAULT_DISAGGREGATION_MODE
=
DisaggregationMode
.
AGGREGATED
...
@@ -123,7 +123,7 @@ def cmd_line_args():
...
@@ -123,7 +123,7 @@ def cmd_line_args():
parser
.
add_argument
(
parser
.
add_argument
(
"--publish-events-and-metrics"
,
"--publish-events-and-metrics"
,
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"
P
ublish events and metrics to the dynamo components.
Note: This is not supported when running in prefill disaggregation mode.
"
,
help
=
"
If set, p
ublish events and metrics to the dynamo components."
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--disaggregation-mode"
,
"--disaggregation-mode"
,
...
...
pyproject.toml
View file @
5f179186
...
@@ -79,7 +79,7 @@ requires = ["hatchling"]
...
@@ -79,7 +79,7 @@ requires = ["hatchling"]
build-backend
=
"hatchling.build"
build-backend
=
"hatchling.build"
[tool.hatch.build.targets.wheel]
[tool.hatch.build.targets.wheel]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/frontend/src/dynamo"
,
"components/backends/llama_cpp/src/dynamo"
,
"components/backends/mocker/src/dynamo"
]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/frontend/src/dynamo"
,
"components/backends/llama_cpp/src/dynamo"
,
"components/backends/mocker/src/dynamo"
,
"components/backends/trtllm/src/dynamo"
]
# This section is for including the binaries in the wheel package
# This section is for including the binaries in the wheel package
# but doesn't make them executable scripts in the venv bin directory
# but doesn't make them executable scripts in the venv bin directory
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment