Unverified Commit a21cd9ed authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix incorrect `image_grid_thw` rank for HunyuanOCR from missing...


[Bugfix] Fix incorrect `image_grid_thw` rank for HunyuanOCR from missing `merge_by_field_config=True` (#29950)
Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 7fe9c1a2
......@@ -309,6 +309,28 @@ def load_h2ovl(question: str, image_urls: list[str]) -> ModelRequestData:
)
# HunyuanOCR
def load_hunyuan_vl(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "tencent/HunyuanOCR"
engine_args = EngineArgs(
model=model_name,
max_model_len=8192,
limit_mm_per_prompt={"image": len(image_urls)},
)
placeholder = (
"<|hy_place▁holder▁no▁100|><|hy_place▁holder▁no▁102|><|hy_place▁holder▁no▁101|>" # noqa: E501
) * len(image_urls)
prompt = f"<|hy_begin▁of▁sentence|>{placeholder}{question}<|hy_User|>"
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)
def load_hyperclovax_seed_vision(
question: str, image_urls: list[str]
) -> ModelRequestData:
......@@ -1322,6 +1344,7 @@ model_example_map = {
"deepseek_ocr": load_deepseek_ocr,
"gemma3": load_gemma3,
"h2ovl_chat": load_h2ovl,
"hunyuan_vl": load_hunyuan_vl,
"hyperclovax_seed_vision": load_hyperclovax_seed_vision,
"idefics3": load_idefics3,
"interns1": load_interns1,
......
......@@ -785,6 +785,7 @@ class HunYuanVLForConditionalGeneration(
SupportsQuant,
SupportsXDRoPE,
):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw"}
# To ensure correct weight loading and mapping.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment