Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
222245e2
Unverified
Commit
222245e2
authored
Jul 25, 2025
by
Tanmay Verma
Committed by
GitHub
Jul 26, 2025
Browse files
refactor: Move engine and publisher from dynamo.llm.tensorrt_llm to dynamo.trtllm (#2128)
parent
4498a77d
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
23 additions
and
51 deletions
+23
-51
components/backends/trtllm/src/dynamo/trtllm/engine.py
components/backends/trtllm/src/dynamo/trtllm/engine.py
+0
-0
components/backends/trtllm/src/dynamo/trtllm/main.py
components/backends/trtllm/src/dynamo/trtllm/main.py
+6
-9
components/backends/trtllm/src/dynamo/trtllm/publisher.py
components/backends/trtllm/src/dynamo/trtllm/publisher.py
+13
-6
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
...trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
+2
-2
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handlers.py
...nds/trtllm/src/dynamo/trtllm/request_handlers/handlers.py
+1
-1
components/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
...s/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
+1
-1
lib/bindings/python/src/dynamo/llm/__init__.py
lib/bindings/python/src/dynamo/llm/__init__.py
+0
-14
lib/bindings/python/src/dynamo/llm/tensorrtllm/__init__.py
lib/bindings/python/src/dynamo/llm/tensorrtllm/__init__.py
+0
-18
No files found.
lib/bindings/python
/src/dynamo/
llm/tensor
rtllm/engine.py
→
components/backends/trtllm
/src/dynamo/
t
rtllm/engine.py
View file @
222245e2
File moved
components/backends/trtllm/src/dynamo/trtllm/main.py
View file @
222245e2
...
...
@@ -11,15 +11,12 @@ from tensorrt_llm import SamplingParams
from
tensorrt_llm.llmapi.llm_utils
import
update_llm_args_with_extra_options
from
tensorrt_llm.llmapi.tokenizer
import
tokenizer_factory
from
dynamo.llm
import
(
ModelType
,
get_tensorrtllm_engine
,
get_tensorrtllm_publisher
,
register_llm
,
)
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.runtime
import
DistributedRuntime
,
dynamo_worker
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.utils.request_handlers.handlers
import
(
from
dynamo.trtllm.engine
import
get_llm_engine
from
dynamo.trtllm.publisher
import
get_publisher
from
dynamo.trtllm.request_handlers.handlers
import
(
RequestHandlerConfig
,
RequestHandlerFactory
,
)
...
...
@@ -129,7 +126,7 @@ async def init(runtime: DistributedRuntime, config: Config):
# We already detokenize inside HandlerBase. No need to also do it in TRTLLM.
default_sampling_params
.
detokenize
=
False
async
with
get_
tensorrt
llm_engine
(
engine_args
)
as
engine
:
async
with
get_llm_engine
(
engine_args
)
as
engine
:
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
if
is_first_worker
(
config
):
...
...
@@ -159,7 +156,7 @@ async def init(runtime: DistributedRuntime, config: Config):
kv_listener
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
async
with
get_
tensorrtllm_
publisher
(
async
with
get_publisher
(
component
,
engine
,
kv_listener
,
...
...
lib/bindings/python
/src/dynamo/
llm/tensor
rtllm/publisher.py
→
components/backends/trtllm
/src/dynamo/
t
rtllm/publisher.py
View file @
222245e2
...
...
@@ -9,7 +9,7 @@ import traceback
import
weakref
from
contextlib
import
asynccontextmanager
from
queue
import
Queue
from
typing
import
Callable
,
Optional
,
Union
from
typing
import
Awaitable
,
Callable
,
Optional
,
Union
from
dynamo.llm
import
(
ForwardPassMetrics
,
...
...
@@ -41,7 +41,7 @@ class ManagedThread(threading.Thread):
def
__init__
(
self
,
task
:
Optional
[
Union
[
Callable
[...,
bool
],
weakref
.
WeakMethod
]],
task
:
Optional
[
Union
[
Callable
[...,
Awaitable
[
bool
]
]
,
weakref
.
WeakMethod
]],
error_queue
:
Optional
[
Queue
]
=
None
,
name
:
Optional
[
str
]
=
None
,
loop
:
Optional
[
asyncio
.
AbstractEventLoop
]
=
None
,
...
...
@@ -62,7 +62,9 @@ class ManagedThread(threading.Thread):
def
run
(
self
):
while
not
self
.
_stop_event
.
is_set
():
task
:
Optional
[
Union
[
Callable
[...,
bool
],
weakref
.
WeakMethod
]]
=
self
.
task
task
:
Optional
[
Union
[
Callable
[...,
Awaitable
[
bool
]],
weakref
.
WeakMethod
]
]
=
self
.
task
if
isinstance
(
task
,
weakref
.
WeakMethod
):
task
=
task
()
if
task
is
None
:
...
...
@@ -77,9 +79,14 @@ class ManagedThread(threading.Thread):
if
self
.
loop
is
None
:
logging
.
error
(
"[ManagedThread] Loop not initialized!"
)
break
self
.
_current_future
=
asyncio
.
run_coroutine_threadsafe
(
task
(
**
self
.
kwargs
),
self
.
loop
)
# Call the task function to get the coroutine
coro
=
task
(
**
self
.
kwargs
)
if
not
asyncio
.
iscoroutine
(
coro
):
logging
.
error
(
f
"Task
{
task
}
did not return a coroutine"
)
break
self
.
_current_future
=
asyncio
.
run_coroutine_threadsafe
(
coro
,
self
.
loop
)
_
=
self
.
_current_future
.
result
()
except
(
asyncio
.
CancelledError
,
concurrent
.
futures
.
CancelledError
):
logging
.
debug
(
f
"Thread
{
self
.
name
}
was cancelled"
)
...
...
components/backends/trtllm/src/dynamo/trtllm/
utils/
request_handlers/handler_base.py
→
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
View file @
222245e2
...
...
@@ -20,9 +20,9 @@ from enum import Enum
from
tensorrt_llm
import
SamplingParams
from
tensorrt_llm.llmapi
import
DisaggregatedParams
as
LlmDisaggregatedParams
from
dynamo.llm.tensorrtllm.engine
import
TensorRTLLMEngine
from
dynamo.llm.tensorrtllm.publisher
import
Publisher
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.trtllm.engine
import
TensorRTLLMEngine
from
dynamo.trtllm.publisher
import
Publisher
from
dynamo.trtllm.utils.disagg_utils
import
(
DisaggregatedParams
,
DisaggregatedParamsCodec
,
...
...
components/backends/trtllm/src/dynamo/trtllm/
utils/
request_handlers/handlers.py
→
components/backends/trtllm/src/dynamo/trtllm/request_handlers/handlers.py
View file @
222245e2
...
...
@@ -3,7 +3,7 @@
import
copy
from
dynamo.trtllm.
utils.
request_handlers.handler_base
import
(
from
dynamo.trtllm.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationStrategy
,
HandlerBase
,
...
...
components/backends/trtllm/src/dynamo/trtllm/utils/trtllm_utils.py
View file @
222245e2
...
...
@@ -4,7 +4,7 @@
import
argparse
from
typing
import
Optional
from
dynamo.trtllm.
utils.
request_handlers.handler_base
import
(
from
dynamo.trtllm.request_handlers.handler_base
import
(
DisaggregationMode
,
DisaggregationStrategy
,
)
...
...
lib/bindings/python/src/dynamo/llm/__init__.py
View file @
222245e2
...
...
@@ -41,17 +41,3 @@ from dynamo._core import compute_block_hash_for_seq_py as compute_block_hash_for
from
dynamo._core
import
make_engine
from
dynamo._core
import
register_llm
as
register_llm
from
dynamo._core
import
run_input
try
:
from
dynamo.llm.tensorrtllm
import
(
# noqa: F401
get_llm_engine
as
get_tensorrtllm_engine
,
)
from
dynamo.llm.tensorrtllm
import
(
# noqa: F401
get_publisher
as
get_tensorrtllm_publisher
,
)
except
ImportError
:
pass
# TensorRTLLM is not enabled by default
except
Exception
as
e
:
# Don't let TensorRTLLM break other engines
logger
=
logging
.
getLogger
(
__name__
)
logger
.
exception
(
f
"Error importing TensorRT-LLM components:
{
e
}
"
)
lib/bindings/python/src/dynamo/llm/tensorrtllm/__init__.py
deleted
100644 → 0
View file @
4498a77d
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.engine
import
get_llm_engine
# noqa: F401
from
.publisher
import
get_publisher
# noqa: F401
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment