Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
eff08aed
Unverified
Commit
eff08aed
authored
Feb 04, 2026
by
Indrajit Bhosale
Committed by
GitHub
Feb 04, 2026
Browse files
fix: Wrap default_multimodal_input_loader in asyncio.to_thread (#5945)
parent
f3bea5d5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
21 deletions
+27
-21
components/src/dynamo/trtllm/encode_helper.py
components/src/dynamo/trtllm/encode_helper.py
+12
-9
components/src/dynamo/trtllm/multimodal_processor.py
components/src/dynamo/trtllm/multimodal_processor.py
+15
-12
No files found.
components/src/dynamo/trtllm/encode_helper.py
View file @
eff08aed
...
...
@@ -298,15 +298,18 @@ class EncodeHelper:
# for tokenizer loading. `model_type` is needed to retrieve the correct
# multimodal placeholders and apply model-specific preprocessing.
# Pass tokenizer to reuse the pre-initialized tokenizer instead of
# creating a new one per request
inputs
=
default_multimodal_input_loader
(
tokenizer
=
tokenizer
,
model_dir
=
model_dir
,
model_type
=
model_type
,
modality
=
"image"
,
prompts
=
[
text_prompt
],
media
=
image_urls
[
0
],
# NOTE: default_multimodal_input_loader downloads images and preprocesses them
# synchronously. Wrap in asyncio.to_thread to allow concurrent image loading
# across multiple requests, improving throughput at high concurrency.
inputs
=
await
asyncio
.
to_thread
(
lambda
:
default_multimodal_input_loader
(
tokenizer
=
tokenizer
,
model_dir
=
model_dir
,
model_type
=
model_type
,
modality
=
"image"
,
prompts
=
[
text_prompt
],
media
=
image_urls
[
0
],
)
)
# NOTE: MultimodalEncoder.generate() is synchronous. Run it off-thread to avoid
...
...
components/src/dynamo/trtllm/multimodal_processor.py
View file @
eff08aed
...
...
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
asyncio
import
logging
import
time
from
io
import
BytesIO
...
...
@@ -211,18 +212,20 @@ class MultimodalRequestProcessor:
]
logging
.
info
(
f
"Using embedding paths:
{
embedding_paths
}
"
)
# Process with default_multimodal_input_loader
# Pass self.tokenizer to reuse the pre-initialized tokenizer instead of
# creating a new one per request
processed_inputs
=
default_multimodal_input_loader
(
tokenizer
=
self
.
tokenizer
,
model_dir
=
self
.
model_dir
,
model_type
=
self
.
model_type
,
modality
=
self
.
modality
,
prompts
=
[
text_prompt
],
image_data_format
=
"pt"
,
device
=
"cuda"
,
**
loader_kwargs
,
# NOTE: default_multimodal_input_loader downloads images and preprocesses them
# synchronously. Wrap in asyncio.to_thread to allow concurrent image loading
# across multiple requests, improving throughput at high concurrency.
processed_inputs
=
await
asyncio
.
to_thread
(
lambda
:
default_multimodal_input_loader
(
tokenizer
=
self
.
tokenizer
,
model_dir
=
self
.
model_dir
,
model_type
=
self
.
model_type
,
modality
=
self
.
modality
,
prompts
=
[
text_prompt
],
image_data_format
=
"pt"
,
device
=
"cuda"
,
**
loader_kwargs
,
)
)
# Return the first processed input if available
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment