Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
60700899
Unverified
Commit
60700899
authored
Jan 21, 2026
by
Ayush Agarwal
Committed by
GitHub
Jan 21, 2026
Browse files
fix: multi image bug for qwen ec connector (#5514)
Signed-off-by:
ayushag
<
ayushag@nvidia.com
>
parent
fdd2d15e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
37 deletions
+35
-37
components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py
.../dynamo/vllm/multimodal_handlers/encode_worker_handler.py
+35
-37
No files found.
components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py
View file @
60700899
...
...
@@ -276,19 +276,16 @@ class VLLMEncodeWorkerHandler:
f
"for request_id=
{
request
.
request_id
}
"
)
# Process each multimodal input
# Load all images
# TODO: support video and audio encoding later
media_list
=
[]
modality
=
"image"
for
idx
,
mm_group
in
enumerate
(
request
.
multimodal_inputs
):
mm_input
=
mm_group
.
multimodal_input
item_request_id
=
f
"
{
request
.
request_id
}
_mm_
{
idx
}
"
# Load media (image/video/audio)
# TODO: Add support for video_url and audio
if
mm_input
.
image_url
:
media
=
await
self
.
image_loader
.
load_image
(
mm_input
.
image_url
)
media_key
=
"image"
modality
=
"image"
media_list
.
append
(
media
)
elif
mm_input
.
video_url
:
# TODO: Implement video loading
raise
NotImplementedError
(
"Video encoding not yet supported"
)
else
:
raise
ValueError
(
...
...
@@ -296,26 +293,17 @@ class VLLMEncodeWorkerHandler:
"Specify image_url or video_url."
)
# Compute mm_hash using vLLM's hasher
try
:
mm_hash
=
MultiModalHasher
.
hash_kwargs
(
model_id
=
self
.
config
.
model
,
**
{
media_key
:
media
}
)
logger
.
debug
(
f
"[
{
item_request_id
}
] Computed mm_hash:
{
mm_hash
}
"
)
except
Exception
as
e
:
logger
.
error
(
f
"[
{
item_request_id
}
] Failed to compute mm_hash:
{
e
}
"
)
raise
try
:
# Process all images in one vLLM request
prompt_dict
=
TokensPrompt
(
prompt_token_ids
=
request
.
token_ids
,
multi_modal_data
=
{
media_key
:
media
},
multi_modal_data
=
{
"image"
:
media
_list
},
)
try
:
gen
=
self
.
engine_client
.
generate
(
prompt
=
prompt_dict
,
sampling_params
=
SamplingParams
(
max_tokens
=
1
,
min_tokens
=
0
),
request_id
=
item_
request_id
,
request_id
=
request
.
request_id
,
)
# Consume generator to trigger encoder execution
...
...
@@ -323,16 +311,26 @@ class VLLMEncodeWorkerHandler:
pass
logger
.
info
(
f
"[
{
item_request_id
}
] Encoder execution completed "
f
"(
{
idx
+
1
}
/
{
len
(
request
.
multimodal_inputs
)
}
)"
f
"[
{
request
.
request_id
}
] Encoder execution completed for all
{
len
(
media_list
)
}
image(s)"
)
except
Exception
as
e
:
logger
.
error
(
f
"[
{
item_request_id
}
] Encoder execution failed:
{
e
}
"
)
logger
.
error
(
f
"[
{
request
.
request_id
}
] Encoder execution failed:
{
e
}
"
)
raise
# Compute mm_hash for each image and yield responses
for
idx
,
media
in
enumerate
(
media_list
):
item_request_id
=
f
"
{
request
.
request_id
}
_mm_
{
idx
}
"
try
:
mm_hash
=
MultiModalHasher
.
hash_kwargs
(
model_id
=
self
.
config
.
model
,
image
=
media
)
logger
.
debug
(
f
"[
{
item_request_id
}
] Computed mm_hash:
{
mm_hash
}
"
)
except
Exception
as
e
:
logger
.
error
(
f
"[
{
item_request_id
}
] Failed to compute mm_hash:
{
e
}
"
)
raise
# Yield metadata for each item (PD workers can use these to lookup from cache)
# Right now this is not used. Can be used for logging purpose later.
response
=
VLLMNativeEncoderResponse
(
request_id
=
item_request_id
,
mm_hash
=
mm_hash
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment