Unverified Commit 9df152bb authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Misc] Algin Qwen3-VL-embedding image example outputs with HF repo example (#33419)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 876a16f4
...@@ -12,6 +12,8 @@ on HuggingFace model repository. ...@@ -12,6 +12,8 @@ on HuggingFace model repository.
import argparse import argparse
from dataclasses import asdict from dataclasses import asdict
from PIL.Image import Image
from vllm import LLM, EngineArgs from vllm import LLM, EngineArgs
from vllm.multimodal.utils import fetch_image from vllm.multimodal.utils import fetch_image
...@@ -20,17 +22,42 @@ text = "A cat standing in the snow." ...@@ -20,17 +22,42 @@ text = "A cat standing in the snow."
multi_modal_data = {"image": fetch_image(image_url)} multi_modal_data = {"image": fetch_image(image_url)}
def print_embeddings(embeds): def print_embeddings(embeds: list[float]):
embeds_trimmed = (str(embeds[:4])[:-1] + ", ...]") if len(embeds) > 4 else embeds embeds_trimmed = (str(embeds[:4])[:-1] + ", ...]") if len(embeds) > 4 else embeds
print(f"Embeddings: {embeds_trimmed} (size={len(embeds)})") print(f"Embeddings: {embeds_trimmed} (size={len(embeds)})")
def run_qwen3_vl(): def run_qwen3_vl():
try:
from qwen_vl_utils import smart_resize
except ModuleNotFoundError:
print(
"WARNING: `qwen-vl-utils` not installed, input images will not "
"be automatically resized. This can cause different results "
"comparing with HF repo's example. "
"You can enable this functionality by `pip install qwen-vl-utils`."
)
smart_resize = None
if smart_resize is not None:
def post_process_image(image: Image) -> Image:
width, height = image.size
resized_height, resized_width = smart_resize(
height,
width,
factor=32,
)
return image.resize((resized_width, resized_height))
multi_modal_data["image"] = post_process_image(multi_modal_data["image"])
engine_args = EngineArgs( engine_args = EngineArgs(
model="Qwen/Qwen3-VL-Embedding-2B", model="Qwen/Qwen3-VL-Embedding-2B",
runner="pooling", runner="pooling",
max_model_len=8192, max_model_len=8192,
limit_mm_per_prompt={"image": 1}, limit_mm_per_prompt={"image": 1},
mm_processor_kwargs={"do_resize": False} if smart_resize is not None else None,
) )
default_instruction = "Represent the user's input." default_instruction = "Represent the user's input."
image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>" image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment