Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4bab50a6
Unverified
Commit
4bab50a6
authored
Jul 08, 2025
by
Xinyuan Tong
Committed by
GitHub
Jul 08, 2025
Browse files
Fix llama4 vision (#7840)
Signed-off-by:
Xinyuan Tong
<
justinning0323@outlook.com
>
parent
2e7ab862
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
82 additions
and
63 deletions
+82
-63
python/sglang/srt/conversation.py
python/sglang/srt/conversation.py
+16
-1
python/sglang/srt/managers/mm_utils.py
python/sglang/srt/managers/mm_utils.py
+4
-2
python/sglang/srt/multimodal/processors/mllama4.py
python/sglang/srt/multimodal/processors/mllama4.py
+62
-60
No files found.
python/sglang/srt/conversation.py
View file @
4bab50a6
...
@@ -935,6 +935,19 @@ register_conv_template(
...
@@ -935,6 +935,19 @@ register_conv_template(
)
)
)
)
register_conv_template
(
Conversation
(
name
=
"llama_4_vision"
,
system_message
=
"You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language."
,
system_template
=
"<|header_start|>system<|header_end|>
\n\n
{system_message}<|eot|>"
,
roles
=
(
"user"
,
"assistant"
),
sep_style
=
SeparatorStyle
.
LLAMA4
,
sep
=
""
,
stop_str
=
"<|eot|>"
,
image_token
=
"<|image|>"
,
)
)
@
register_conv_template_matching_function
@
register_conv_template_matching_function
def
match_internvl
(
model_path
:
str
):
def
match_internvl
(
model_path
:
str
):
...
@@ -943,9 +956,11 @@ def match_internvl(model_path: str):
...
@@ -943,9 +956,11 @@ def match_internvl(model_path: str):
@
register_conv_template_matching_function
@
register_conv_template_matching_function
def
match_llama_
3_
vision
(
model_path
:
str
):
def
match_llama_vision
(
model_path
:
str
):
if
re
.
search
(
r
"llama.*3\.2.*vision"
,
model_path
,
re
.
IGNORECASE
):
if
re
.
search
(
r
"llama.*3\.2.*vision"
,
model_path
,
re
.
IGNORECASE
):
return
"llama_3_vision"
return
"llama_3_vision"
if
re
.
search
(
r
"llama.*4.*"
,
model_path
,
re
.
IGNORECASE
):
return
"llama_4_vision"
@
register_conv_template_matching_function
@
register_conv_template_matching_function
...
...
python/sglang/srt/managers/mm_utils.py
View file @
4bab50a6
...
@@ -248,7 +248,9 @@ def _get_chunked_prefill_embedding(
...
@@ -248,7 +248,9 @@ def _get_chunked_prefill_embedding(
)
->
Optional
[
torch
.
Tensor
]:
)
->
Optional
[
torch
.
Tensor
]:
# Calculate embedding for each request, try to get it from cache to avoid repeated calculation
# Calculate embedding for each request, try to get it from cache to avoid repeated calculation
embedding_list
=
[]
embedding_list
=
[]
for
i
in
range
(
len
(
items_size
)
-
1
):
# FIXME(Xinyuan): temporary workaround for eagle3, which may have len(items_size) > len(prefix_length)
max_iterations
=
min
(
len
(
items_size
)
-
1
,
len
(
prefix_length
))
for
i
in
range
(
max_iterations
):
if
items_size
[
i
]
==
items_size
[
i
+
1
]:
if
items_size
[
i
]
==
items_size
[
i
+
1
]:
continue
continue
embedding_items_per_req
=
embedding_items
[
items_size
[
i
]
:
items_size
[
i
+
1
]]
embedding_items_per_req
=
embedding_items
[
items_size
[
i
]
:
items_size
[
i
+
1
]]
...
@@ -269,7 +271,7 @@ def _get_chunked_prefill_embedding(
...
@@ -269,7 +271,7 @@ def _get_chunked_prefill_embedding(
embedding_per_req_chunk
,
_
,
end_index
=
get_embedding_chunk
(
embedding_per_req_chunk
,
_
,
end_index
=
get_embedding_chunk
(
embedding
=
embedding_per_req
,
embedding
=
embedding_per_req
,
extend_prefix_len
=
prefix_length
[
i
],
extend_prefix_len
=
prefix_length
[
i
],
extend_seq_len
=
extend_length
[
i
],
extend_seq_len
=
extend_length
[
i
]
if
i
<
len
(
extend_length
)
else
0
,
items_offset
=
items_offset
,
items_offset
=
items_offset
,
)
)
# remove this item from cache if chunk reaches to the end
# remove this item from cache if chunk reaches to the end
...
...
python/sglang/srt/multimodal/processors/mllama4.py
View file @
4bab50a6
...
@@ -60,7 +60,9 @@ class Mllama4ImageProcessor(BaseMultimodalProcessor):
...
@@ -60,7 +60,9 @@ class Mllama4ImageProcessor(BaseMultimodalProcessor):
)
)
# Handle image resolutions and aspect ratios
# Handle image resolutions and aspect ratios
if
"pixel_values"
in
processor_output
:
if
"pixel_values"
not
in
processor_output
:
# no image processed
return
None
image_processor
=
processor
.
image_processor
image_processor
=
processor
.
image_processor
tokenizer
=
self
.
_processor
.
tokenizer
tokenizer
=
self
.
_processor
.
tokenizer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment