Unverified Commit 4bab50a6 authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

Fix llama4 vision (#7840)


Signed-off-by: default avatarXinyuan Tong <justinning0323@outlook.com>
parent 2e7ab862
...@@ -935,6 +935,19 @@ register_conv_template( ...@@ -935,6 +935,19 @@ register_conv_template(
) )
) )
register_conv_template(
Conversation(
name="llama_4_vision",
system_message="You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.",
system_template="<|header_start|>system<|header_end|>\n\n{system_message}<|eot|>",
roles=("user", "assistant"),
sep_style=SeparatorStyle.LLAMA4,
sep="",
stop_str="<|eot|>",
image_token="<|image|>",
)
)
@register_conv_template_matching_function @register_conv_template_matching_function
def match_internvl(model_path: str): def match_internvl(model_path: str):
...@@ -943,9 +956,11 @@ def match_internvl(model_path: str): ...@@ -943,9 +956,11 @@ def match_internvl(model_path: str):
@register_conv_template_matching_function @register_conv_template_matching_function
def match_llama_3_vision(model_path: str): def match_llama_vision(model_path: str):
if re.search(r"llama.*3\.2.*vision", model_path, re.IGNORECASE): if re.search(r"llama.*3\.2.*vision", model_path, re.IGNORECASE):
return "llama_3_vision" return "llama_3_vision"
if re.search(r"llama.*4.*", model_path, re.IGNORECASE):
return "llama_4_vision"
@register_conv_template_matching_function @register_conv_template_matching_function
......
...@@ -248,7 +248,9 @@ def _get_chunked_prefill_embedding( ...@@ -248,7 +248,9 @@ def _get_chunked_prefill_embedding(
) -> Optional[torch.Tensor]: ) -> Optional[torch.Tensor]:
# Calculate embedding for each request, try to get it from cache to avoid repeated calculation # Calculate embedding for each request, try to get it from cache to avoid repeated calculation
embedding_list = [] embedding_list = []
for i in range(len(items_size) - 1): # FIXME(Xinyuan): temporary workaround for eagle3, which may have len(items_size) > len(prefix_length)
max_iterations = min(len(items_size) - 1, len(prefix_length))
for i in range(max_iterations):
if items_size[i] == items_size[i + 1]: if items_size[i] == items_size[i + 1]:
continue continue
embedding_items_per_req = embedding_items[items_size[i] : items_size[i + 1]] embedding_items_per_req = embedding_items[items_size[i] : items_size[i + 1]]
...@@ -269,7 +271,7 @@ def _get_chunked_prefill_embedding( ...@@ -269,7 +271,7 @@ def _get_chunked_prefill_embedding(
embedding_per_req_chunk, _, end_index = get_embedding_chunk( embedding_per_req_chunk, _, end_index = get_embedding_chunk(
embedding=embedding_per_req, embedding=embedding_per_req,
extend_prefix_len=prefix_length[i], extend_prefix_len=prefix_length[i],
extend_seq_len=extend_length[i], extend_seq_len=extend_length[i] if i < len(extend_length) else 0,
items_offset=items_offset, items_offset=items_offset,
) )
# remove this item from cache if chunk reaches to the end # remove this item from cache if chunk reaches to the end
......
...@@ -60,7 +60,9 @@ class Mllama4ImageProcessor(BaseMultimodalProcessor): ...@@ -60,7 +60,9 @@ class Mllama4ImageProcessor(BaseMultimodalProcessor):
) )
# Handle image resolutions and aspect ratios # Handle image resolutions and aspect ratios
if "pixel_values" in processor_output: if "pixel_values" not in processor_output: # no image processed
return None
image_processor = processor.image_processor image_processor = processor.image_processor
tokenizer = self._processor.tokenizer tokenizer = self._processor.tokenizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment