Unverified Commit 55e6d3d5 authored by Raushan Turganbay's avatar Raushan Turganbay Committed by GitHub
Browse files

[Bugfix] Make siglip/clip compatible with transformers v5 (#37200)


Signed-off-by: default avatarraushan <raushan@huggingface.co>
parent 6682c231
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
import torch
from transformers import CLIPModel from transformers import CLIPModel
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
...@@ -50,13 +51,16 @@ def _run_test( ...@@ -50,13 +51,16 @@ def _run_test(
if "pixel_values" in inputs: if "pixel_values" in inputs:
pooled_output = hf_model.model.get_image_features( pooled_output = hf_model.model.get_image_features(
pixel_values=inputs.pixel_values, pixel_values=inputs.pixel_values,
).squeeze(0) )
else: else:
pooled_output = hf_model.model.get_text_features( pooled_output = hf_model.model.get_text_features(
input_ids=inputs.input_ids, input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask, attention_mask=inputs.attention_mask,
).squeeze(0) )
if not isinstance(pooled_output, torch.Tensor):
pooled_output = pooled_output.pooler_output
pooled_output = pooled_output.squeeze(0)
all_outputs.append(pooled_output.tolist()) all_outputs.append(pooled_output.tolist())
hf_outputs = all_outputs hf_outputs = all_outputs
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from typing import Any from typing import Any
import pytest import pytest
import torch
from transformers import SiglipModel from transformers import SiglipModel
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
...@@ -68,12 +69,15 @@ def _run_test( ...@@ -68,12 +69,15 @@ def _run_test(
if "pixel_values" in inputs: if "pixel_values" in inputs:
pooled_output = hf_model.model.get_image_features( pooled_output = hf_model.model.get_image_features(
pixel_values=inputs.pixel_values, pixel_values=inputs.pixel_values,
).squeeze(0) )
else: else:
pooled_output = hf_model.model.get_text_features( pooled_output = hf_model.model.get_text_features(
input_ids=inputs.input_ids, input_ids=inputs.input_ids,
).squeeze(0) )
if not isinstance(pooled_output, torch.Tensor):
pooled_output = pooled_output.pooler_output
pooled_output = pooled_output.squeeze(0)
all_outputs.append(pooled_output.tolist()) all_outputs.append(pooled_output.tolist())
hf_outputs = all_outputs hf_outputs = all_outputs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment