Commit 1ed963d4 authored by wang.yuqi's avatar wang.yuqi Committed by khluu
Browse files

[Bugfix] Fix Qwen3-VL-Reranker load. (#33298)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
Signed-off-by: default avatarwang.yuqi <noooop@126.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
(cherry picked from commit abb34ac4)
parent 39e8b493
...@@ -18,48 +18,32 @@ e.g. ...@@ -18,48 +18,32 @@ e.g.
""" """
import argparse import argparse
import base64 import pprint
import json
import requests import requests
from vllm.multimodal.utils import encode_image_url, fetch_image
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
"""Encode a content retrieved from a remote url to base64 format."""
with requests.get(content_url, headers=headers) as response:
response.raise_for_status()
result = base64.b64encode(response.content).decode("utf-8")
return {"url": f"data:image/jpeg;base64,{result}"}
headers = {"accept": "application/json", "Content-Type": "application/json"}
query = "A woman playing with her dog on a beach at sunset." query = "A woman playing with her dog on a beach at sunset."
documents = { document = (
"content": [ "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "
{ "as the dog offers its paw in a heartwarming display of companionship and trust."
"type": "text", )
"text": ( image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, " documents = [
"as the dog offers its paw in a heartwarming display of companionship and trust." {
), "type": "text",
}, "text": document,
{ },
"type": "image_url", {
"image_url": { "type": "image_url",
"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" "image_url": {"url": image_url},
}, },
}, {
{ "type": "image_url",
"type": "image_url", "image_url": {"url": encode_image_url(fetch_image(image_url))},
"image_url": encode_base64_content_from_url( },
"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" ]
),
},
]
}
def parse_args(): def parse_args():
...@@ -74,23 +58,36 @@ def main(args): ...@@ -74,23 +58,36 @@ def main(args):
models_url = base_url + "/v1/models" models_url = base_url + "/v1/models"
rerank_url = base_url + "/rerank" rerank_url = base_url + "/rerank"
response = requests.get(models_url, headers=headers) response = requests.get(models_url)
model = response.json()["data"][0]["id"] model = response.json()["data"][0]["id"]
data = { print("Query: string & Document: list of string")
prompt = {"model": model, "query": query, "documents": [document]}
response = requests.post(rerank_url, json=prompt)
pprint.pprint(response.json())
print("Query: string & Document: text")
prompt = {"model": model, "query": query, "documents": {"content": [documents[0]]}}
response = requests.post(rerank_url, json=prompt)
pprint.pprint(response.json())
print("Query: string & Document: image url")
prompt = {
"model": model,
"query": query,
"documents": {"content": [documents[1]]},
}
response = requests.post(rerank_url, json=prompt)
pprint.pprint(response.json())
print("Query: string & Document: image base64")
prompt = {
"model": model, "model": model,
"query": query, "query": query,
"documents": documents, "documents": {"content": [documents[2]]},
} }
response = requests.post(rerank_url, headers=headers, json=data) response = requests.post(rerank_url, json=prompt)
pprint.pprint(response.json())
# Check the response
if response.status_code == 200:
print("Request successful!")
print(json.dumps(response.json(), indent=2))
else:
print(f"Request failed with status code: {response.status_code}")
print(response.text)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -17,48 +17,32 @@ e.g. ...@@ -17,48 +17,32 @@ e.g.
""" """
import argparse import argparse
import base64
import json
import pprint import pprint
import requests import requests
from vllm.multimodal.utils import encode_image_url, fetch_image
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
"""Encode a content retrieved from a remote url to base64 format.""" query = "A woman playing with her dog on a beach at sunset."
document = (
with requests.get(content_url, headers=headers) as response: "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "
response.raise_for_status() "as the dog offers its paw in a heartwarming display of companionship and trust."
result = base64.b64encode(response.content).decode("utf-8") )
image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
return {"url": f"data:image/jpeg;base64,{result}"} documents = [
{
"type": "text",
headers = {"accept": "application/json", "Content-Type": "application/json"} "text": document,
},
queries = "slm markdown" {
documents = { "type": "image_url",
"content": [ "image_url": {"url": image_url},
{ },
"type": "image_url", {
"image_url": { "type": "image_url",
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png" "image_url": {"url": encode_image_url(fetch_image(image_url))},
}, },
}, ]
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
},
},
{
"type": "image_url",
"image_url": encode_base64_content_from_url(
"https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
),
},
]
}
def parse_args(): def parse_args():
...@@ -73,15 +57,40 @@ def main(args): ...@@ -73,15 +57,40 @@ def main(args):
models_url = base_url + "/v1/models" models_url = base_url + "/v1/models"
score_url = base_url + "/score" score_url = base_url + "/score"
response = requests.get(models_url, headers=headers) response = requests.get(models_url)
model = response.json()["data"][0]["id"] model = response.json()["data"][0]["id"]
prompt = {"model": model, "queries": queries, "documents": documents} print("Query: string & Document: string")
response = requests.post(score_url, headers=headers, json=prompt) prompt = {"model": model, "queries": query, "documents": document}
print("\nPrompt when queries is string and documents is a image list:") response = requests.post(score_url, json=prompt)
pprint.pprint(prompt) pprint.pprint(response.json())
print("\nScore Response:")
print(json.dumps(response.json(), indent=2)) print("Query: string & Document: text")
prompt = {
"model": model,
"queries": query,
"documents": {"content": [documents[0]]},
}
response = requests.post(score_url, json=prompt)
pprint.pprint(response.json())
print("Query: string & Document: image url")
prompt = {
"model": model,
"queries": query,
"documents": {"content": [documents[1]]},
}
response = requests.post(score_url, json=prompt)
pprint.pprint(response.json())
print("Query: string & Document: image base64")
prompt = {
"model": model,
"queries": query,
"documents": {"content": [documents[2]]},
}
response = requests.post(score_url, json=prompt)
pprint.pprint(response.json())
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -5,9 +5,9 @@ import json ...@@ -5,9 +5,9 @@ import json
import pytest import pytest
import requests import requests
from tests.entrypoints.test_utils import encode_base64_content_from_url
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse
from vllm.multimodal.utils import encode_image_url, fetch_image
MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls" MODEL_NAME = "muziyongshixin/Qwen2.5-VL-7B-for-VideoCls"
MAXIMUM_VIDEOS = 1 MAXIMUM_VIDEOS = 1
...@@ -19,7 +19,7 @@ HF_OVERRIDES = { ...@@ -19,7 +19,7 @@ HF_OVERRIDES = {
} }
input_text = "This product was excellent and exceeded my expectations" input_text = "This product was excellent and exceeded my expectations"
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg" image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
image_base64 = encode_base64_content_from_url(image_url) image_base64 = {"url": encode_image_url(fetch_image(image_url))}
video_url = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4" video_url = "https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import requests
from tests.utils import VLLM_PATH, RemoteOpenAIServer
from vllm.entrypoints.pooling.score.protocol import ScoreResponse
from vllm.multimodal.utils import encode_image_url, fetch_image
MODEL_NAME = "Qwen/Qwen3-VL-Reranker-2B"
HF_OVERRIDES = {
"architectures": ["Qwen3VLForSequenceClassification"],
"classifier_from_token": ["no", "yes"],
"is_original_qwen3_reranker": True,
}
query = "A cat standing in the snow."
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
documents = [
{
"type": "text",
"text": query,
},
{
"type": "image_url",
"image_url": {"url": image_url},
},
{
"type": "image_url",
"image_url": {"url": encode_image_url(fetch_image(image_url))},
},
]
@pytest.fixture(scope="module")
def server():
args = [
"--enforce-eager",
"--max-model-len",
"8192",
"--chat-template",
str(VLLM_PATH / "examples/pooling/score/template/qwen3_vl_reranker.jinja"),
]
with RemoteOpenAIServer(
MODEL_NAME, args, override_hf_configs=HF_OVERRIDES
) as remote_server:
yield remote_server
def test_score_api_queries_str_documents_str(server: RemoteOpenAIServer):
queries = "What is the capital of France?"
documents = "The capital of France is Paris."
score_response = requests.post(
server.url_for("score"),
json={
"model": MODEL_NAME,
"queries": queries,
"documents": documents,
},
)
score_response.raise_for_status()
score = ScoreResponse.model_validate(score_response.json())
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
def test_score_api_queries_str_documents_text_content(server: RemoteOpenAIServer):
score_response = requests.post(
server.url_for("score"),
json={
"model": MODEL_NAME,
"queries": query,
"documents": {"content": [documents[0]]},
},
)
score_response.raise_for_status()
score = ScoreResponse.model_validate(score_response.json())
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
def test_score_api_queries_str_documents_image_url_content(server: RemoteOpenAIServer):
score_response = requests.post(
server.url_for("score"),
json={
"model": MODEL_NAME,
"queries": query,
"documents": {"content": [documents[1]]},
},
)
score_response.raise_for_status()
score = ScoreResponse.model_validate(score_response.json())
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
def test_score_api_queries_str_documents_image_base64_content(
server: RemoteOpenAIServer,
):
score_response = requests.post(
server.url_for("score"),
json={
"model": MODEL_NAME,
"queries": query,
"documents": {"content": [documents[2]]},
},
)
score_response.raise_for_status()
score = ScoreResponse.model_validate(score_response.json())
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
import requests
from vllm.entrypoints.utils import sanitize_message from vllm.entrypoints.utils import sanitize_message
...@@ -12,11 +8,3 @@ def test_sanitize_message(): ...@@ -12,11 +8,3 @@ def test_sanitize_message():
sanitize_message("<_io.BytesIO object at 0x7a95e299e750>") sanitize_message("<_io.BytesIO object at 0x7a95e299e750>")
== "<_io.BytesIO object>" == "<_io.BytesIO object>"
) )
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
with requests.get(content_url) as response:
response.raise_for_status()
result = base64.b64encode(response.content).decode("utf-8")
return {"url": f"data:image/jpeg;base64,{result}"}
...@@ -466,6 +466,7 @@ def load_weights_using_from_2_way_softmax( ...@@ -466,6 +466,7 @@ def load_weights_using_from_2_way_softmax(
language_model = _get_language_model_for_seq_cls(model) language_model = _get_language_model_for_seq_cls(model)
is_vlm = language_model is not model is_vlm = language_model is not model
using_vlm_head = is_vlm and hasattr(language_model, "score")
language_model.lm_head = ParallelLMHead( language_model.lm_head = ParallelLMHead(
text_config.vocab_size, text_config.hidden_size, quant_config=quant_config text_config.vocab_size, text_config.hidden_size, quant_config=quant_config
...@@ -506,14 +507,16 @@ def load_weights_using_from_2_way_softmax( ...@@ -506,14 +507,16 @@ def load_weights_using_from_2_way_softmax(
torch.float32 torch.float32
) - lm_head_weight.data[[false_id]].to(torch.float32) ) - lm_head_weight.data[[false_id]].to(torch.float32)
score_layer = language_model.score if is_vlm else model.score score_layer = language_model.score if using_vlm_head else model.score
param = score_layer.weight param = score_layer.weight
weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, score_weight) weight_loader(param, score_weight)
del language_model.lm_head del language_model.lm_head
score_weight_name = "language_model.score.weight" if is_vlm else "score.weight" score_weight_name = (
"language_model.score.weight" if using_vlm_head else "score.weight"
)
loaded_weights.add(score_weight_name) loaded_weights.add(score_weight_name)
lm_head_name = "lm_head.weight" lm_head_name = "lm_head.weight"
...@@ -537,6 +540,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te ...@@ -537,6 +540,7 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
language_model = _get_language_model_for_seq_cls(model) language_model = _get_language_model_for_seq_cls(model)
is_vlm = language_model is not model is_vlm = language_model is not model
using_vlm_head = is_vlm and hasattr(language_model, "score")
language_model.lm_head = ParallelLMHead( language_model.lm_head = ParallelLMHead(
text_config.vocab_size, text_config.hidden_size, quant_config=quant_config text_config.vocab_size, text_config.hidden_size, quant_config=quant_config
...@@ -572,14 +576,16 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te ...@@ -572,14 +576,16 @@ def load_weights_no_post_processing(model, weights: Iterable[tuple[str, torch.Te
token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens] token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]
score_weight = language_model.lm_head.weight.data[token_ids] score_weight = language_model.lm_head.weight.data[token_ids]
score_layer = language_model.score if is_vlm else model.score score_layer = language_model.score if using_vlm_head else model.score
param = score_layer.weight param = score_layer.weight
weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, score_weight) weight_loader(param, score_weight)
del language_model.lm_head del language_model.lm_head
score_weight_name = "language_model.score.weight" if is_vlm else "score.weight" score_weight_name = (
"language_model.score.weight" if using_vlm_head else "score.weight"
)
loaded_weights.add(score_weight_name) loaded_weights.add(score_weight_name)
lm_head_name = "lm_head.weight" lm_head_name = "lm_head.weight"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment