"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "fa59fe417f509641fed102dfa2e3b8a63f224241"
Unverified Commit 9ae2f603 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Misc] Various cleanups for MM input processing (#29970)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 80f8af4b
...@@ -795,14 +795,12 @@ The following example demonstrates how to pass image embeddings to the OpenAI se ...@@ -795,14 +795,12 @@ The following example demonstrates how to pass image embeddings to the OpenAI se
??? code ??? code
```python ```python
from vllm.utils.serial_utils import tensor2base64
image_embedding = torch.load(...) image_embedding = torch.load(...)
grid_thw = torch.load(...) # Required by Qwen/Qwen2-VL-2B-Instruct grid_thw = torch.load(...) # Required by Qwen/Qwen2-VL-2B-Instruct
buffer = io.BytesIO() base64_image_embedding = tensor2base64(image_embedding)
torch.save(image_embedding, buffer)
buffer.seek(0)
binary_data = buffer.read()
base64_image_embedding = base64.b64encode(binary_data).decode('utf-8')
client = OpenAI( client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY") # defaults to os.environ.get("OPENAI_API_KEY")
......
...@@ -28,13 +28,11 @@ Dependencies: ...@@ -28,13 +28,11 @@ Dependencies:
- openai - openai
""" """
import base64
import io
import torch
import transformers import transformers
from openai import OpenAI from openai import OpenAI
from vllm.utils.serial_utils import tensor2base64
def main(): def main():
client = OpenAI( client = OpenAI(
...@@ -58,11 +56,7 @@ def main(): ...@@ -58,11 +56,7 @@ def main():
prompt_embeds = embedding_layer(token_ids).squeeze(0) prompt_embeds = embedding_layer(token_ids).squeeze(0)
# Prompt embeddings # Prompt embeddings
buffer = io.BytesIO() encoded_embeds = tensor2base64(prompt_embeds)
torch.save(prompt_embeds, buffer)
buffer.seek(0)
binary_data = buffer.read()
encoded_embeds = base64.b64encode(binary_data).decode("utf-8")
completion = client.completions.create( completion = client.completions.create(
model=model_name, model=model_name,
......
...@@ -2,64 +2,47 @@ ...@@ -2,64 +2,47 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64 import base64
import io
import numpy as np import numpy as np
import pytest import pytest
import requests import requests
import torch import torch
from ...utils import RemoteOpenAIServer from vllm.utils.serial_utils import tensor2base64
MODEL_NAME = "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11" from ...utils import RemoteOpenAIServer
DTYPE = "float16"
def _terratorch_dummy_inputs(model_name: str): def _terratorch_dummy_messages():
pixel_values = torch.full((6, 512, 512), 1.0, dtype=torch.float16) pixel_values = torch.full((6, 512, 512), 1.0, dtype=torch.float16)
location_coords = torch.full((1, 2), 1.0, dtype=torch.float16) location_coords = torch.full((1, 2), 1.0, dtype=torch.float16)
buffer_tiff = io.BytesIO() return [
torch.save(pixel_values, buffer_tiff) {
buffer_tiff.seek(0) "role": "user",
binary_data = buffer_tiff.read() "content": [
base64_tensor_embedding = base64.b64encode(binary_data).decode("utf-8") {
"type": "image_embeds",
buffer_coord = io.BytesIO() "image_embeds": {
torch.save(location_coords, buffer_coord) "pixel_values": tensor2base64(pixel_values),
buffer_coord.seek(0) "location_coords": tensor2base64(location_coords),
binary_data = buffer_coord.read() },
base64_coord_embedding = base64.b64encode(binary_data).decode("utf-8") }
],
return { }
"model": model_name, ]
"additional_data": {"prompt_token_ids": [1]},
"encoding_format": "base64",
"messages": [
{
"role": "user",
"content": [
{
"type": "image_embeds",
"image_embeds": {
"pixel_values": base64_tensor_embedding,
"location_coords": base64_coord_embedding,
},
}
],
}
],
}
@pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize(
async def test_single_request(model_name: str): "model_name", ["ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"]
)
def test_single_request(model_name: str):
args = [ args = [
"--runner", "--runner",
"pooling", "pooling",
# use half precision for speed and memory savings in CI environment # use half precision for speed and memory savings in CI environment
"--dtype", "--dtype",
DTYPE, "float16",
"--enforce-eager", "--enforce-eager",
"--trust-remote-code", "--trust-remote-code",
"--max-num-seqs", "--max-num-seqs",
...@@ -70,11 +53,15 @@ async def test_single_request(model_name: str): ...@@ -70,11 +53,15 @@ async def test_single_request(model_name: str):
"--enable-mm-embeds", "--enable-mm-embeds",
] ]
with RemoteOpenAIServer(MODEL_NAME, args) as server: with RemoteOpenAIServer(model_name, args) as server:
prompt = _terratorch_dummy_inputs(model_name) response = requests.post(
server.url_for("pooling"),
# test single pooling json={
response = requests.post(server.url_for("pooling"), json=prompt) "model": model_name,
"messages": _terratorch_dummy_messages(),
"encoding_format": "base64",
},
)
response.raise_for_status() response.raise_for_status()
output = response.json()["data"][0]["data"] output = response.json()["data"][0]["data"]
......
This diff is collapsed.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
import io
import json import json
import openai # use the official client for correctness check import openai # use the official client for correctness check
...@@ -13,6 +11,7 @@ from transformers import AutoConfig ...@@ -13,6 +11,7 @@ from transformers import AutoConfig
from tests.conftest import ImageTestAssets from tests.conftest import ImageTestAssets
from tests.utils import RemoteOpenAIServer from tests.utils import RemoteOpenAIServer
from vllm.utils.serial_utils import tensor2base64
# any model with a chat template should work here # any model with a chat template should work here
MODEL_NAME = "llava-hf/llava-1.5-7b-hf" MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
...@@ -50,18 +49,6 @@ async def client_with_image_embeds(server_with_image_embeds): ...@@ -50,18 +49,6 @@ async def client_with_image_embeds(server_with_image_embeds):
yield async_client yield async_client
def encode_image_embedding_to_base64(image_embedding) -> str:
"""
Encode image embedding to base64 string
"""
buffer = io.BytesIO()
torch.save(image_embedding, buffer)
buffer.seek(0)
binary_data = buffer.read()
base64_image_embedding = base64.b64encode(binary_data).decode("utf-8")
return base64_image_embedding
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("dtype", [torch.half, torch.float16, torch.float32]) @pytest.mark.parametrize("dtype", [torch.half, torch.float16, torch.float32])
...@@ -73,7 +60,7 @@ async def test_completions_with_image_embeds( ...@@ -73,7 +60,7 @@ async def test_completions_with_image_embeds(
): ):
# Test case: Single image embeds input # Test case: Single image embeds input
image_embeds = image_assets[0].image_embeds.to(dtype=dtype) image_embeds = image_assets[0].image_embeds.to(dtype=dtype)
base64_image_embedding = encode_image_embedding_to_base64(image_embeds) base64_image_embedding = tensor2base64(image_embeds)
chat_completion = await client_with_image_embeds.chat.completions.create( chat_completion = await client_with_image_embeds.chat.completions.create(
messages=[ messages=[
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
......
...@@ -536,7 +536,7 @@ def resolve_hf_chat_template( ...@@ -536,7 +536,7 @@ def resolve_hf_chat_template(
def _resolve_chat_template_content_format( def _resolve_chat_template_content_format(
chat_template: str | None, chat_template: str | None,
tools: list[dict[str, Any]] | None, tools: list[dict[str, Any]] | None,
tokenizer: TokenizerLike, tokenizer: TokenizerLike | None,
*, *,
model_config: ModelConfig, model_config: ModelConfig,
) -> _ChatTemplateContentFormat: ) -> _ChatTemplateContentFormat:
...@@ -593,7 +593,7 @@ def resolve_chat_template_content_format( ...@@ -593,7 +593,7 @@ def resolve_chat_template_content_format(
chat_template: str | None, chat_template: str | None,
tools: list[dict[str, Any]] | None, tools: list[dict[str, Any]] | None,
given_format: ChatTemplateContentFormatOption, given_format: ChatTemplateContentFormatOption,
tokenizer: TokenizerLike, tokenizer: TokenizerLike | None,
*, *,
model_config: ModelConfig, model_config: ModelConfig,
) -> _ChatTemplateContentFormat: ) -> _ChatTemplateContentFormat:
...@@ -627,11 +627,10 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]): ...@@ -627,11 +627,10 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
maximum per prompt. maximum per prompt.
""" """
def __init__(self, model_config: ModelConfig, tokenizer: TokenizerLike): def __init__(self, model_config: ModelConfig):
super().__init__() super().__init__()
self._model_config = model_config self._model_config = model_config
self._tokenizer = tokenizer
self._items_by_modality = defaultdict[str, list[_T | None]](list) self._items_by_modality = defaultdict[str, list[_T | None]](list)
self._uuids_by_modality = defaultdict[str, list[str | None]](list) self._uuids_by_modality = defaultdict[str, list[str | None]](list)
...@@ -1612,7 +1611,6 @@ def _postprocess_messages(messages: list[ConversationMessage]) -> None: ...@@ -1612,7 +1611,6 @@ def _postprocess_messages(messages: list[ConversationMessage]) -> None:
def parse_chat_messages( def parse_chat_messages(
messages: list[ChatCompletionMessageParam], messages: list[ChatCompletionMessageParam],
model_config: ModelConfig, model_config: ModelConfig,
tokenizer: TokenizerLike,
content_format: _ChatTemplateContentFormat, content_format: _ChatTemplateContentFormat,
) -> tuple[ ) -> tuple[
list[ConversationMessage], list[ConversationMessage],
...@@ -1620,7 +1618,7 @@ def parse_chat_messages( ...@@ -1620,7 +1618,7 @@ def parse_chat_messages(
MultiModalUUIDDict | None, MultiModalUUIDDict | None,
]: ]:
conversation: list[ConversationMessage] = [] conversation: list[ConversationMessage] = []
mm_tracker = MultiModalItemTracker(model_config, tokenizer) mm_tracker = MultiModalItemTracker(model_config)
for msg in messages: for msg in messages:
sub_messages = _parse_chat_message_content( sub_messages = _parse_chat_message_content(
...@@ -1644,7 +1642,6 @@ def parse_chat_messages( ...@@ -1644,7 +1642,6 @@ def parse_chat_messages(
def parse_chat_messages_futures( def parse_chat_messages_futures(
messages: list[ChatCompletionMessageParam], messages: list[ChatCompletionMessageParam],
model_config: ModelConfig, model_config: ModelConfig,
tokenizer: TokenizerLike,
content_format: _ChatTemplateContentFormat, content_format: _ChatTemplateContentFormat,
) -> tuple[ ) -> tuple[
list[ConversationMessage], list[ConversationMessage],
...@@ -1652,7 +1649,7 @@ def parse_chat_messages_futures( ...@@ -1652,7 +1649,7 @@ def parse_chat_messages_futures(
MultiModalUUIDDict | None, MultiModalUUIDDict | None,
]: ]:
conversation: list[ConversationMessage] = [] conversation: list[ConversationMessage] = []
mm_tracker = AsyncMultiModalItemTracker(model_config, tokenizer) mm_tracker = AsyncMultiModalItemTracker(model_config)
for msg in messages: for msg in messages:
sub_messages = _parse_chat_message_content( sub_messages = _parse_chat_message_content(
......
...@@ -834,7 +834,6 @@ class LLM: ...@@ -834,7 +834,6 @@ class LLM:
conversation, mm_data, mm_uuids = parse_chat_messages( conversation, mm_data, mm_uuids = parse_chat_messages(
msgs, msgs,
model_config, model_config,
tokenizer,
content_format=resolved_content_format, content_format=resolved_content_format,
) )
......
...@@ -1088,11 +1088,6 @@ class OpenAIServing: ...@@ -1088,11 +1088,6 @@ class OpenAIServing:
Sequence[RequestPrompt], Sequence[RequestPrompt],
list[EngineTokensPrompt], list[EngineTokensPrompt],
]: ]:
if tokenizer is None:
raise ValueError(
"Unable to get tokenizer because `skip_tokenizer_init=True`"
)
model_config = self.model_config model_config = self.model_config
resolved_content_format = resolve_chat_template_content_format( resolved_content_format = resolve_chat_template_content_format(
...@@ -1105,7 +1100,6 @@ class OpenAIServing: ...@@ -1105,7 +1100,6 @@ class OpenAIServing:
conversation, mm_data_future, mm_uuids = parse_chat_messages_futures( conversation, mm_data_future, mm_uuids = parse_chat_messages_futures(
messages, messages,
model_config, model_config,
tokenizer,
content_format=resolved_content_format, content_format=resolved_content_format,
) )
......
...@@ -89,12 +89,10 @@ def parse_score_data( ...@@ -89,12 +89,10 @@ def parse_score_data(
data_1: str | ScoreContentPartParam, data_1: str | ScoreContentPartParam,
data_2: str | ScoreContentPartParam, data_2: str | ScoreContentPartParam,
model_config: ModelConfig, model_config: ModelConfig,
tokenizer: TokenizerLike,
) -> tuple[str, str, MultiModalDataDict | None]: ) -> tuple[str, str, MultiModalDataDict | None]:
mm_tracker = MultiModalItemTracker(model_config, tokenizer) mm_tracker = MultiModalItemTracker(model_config)
content_1 = _parse_score_content(data_1, mm_tracker) content_1 = _parse_score_content(data_1, mm_tracker)
content_2 = _parse_score_content(data_2, mm_tracker) content_2 = _parse_score_content(data_2, mm_tracker)
def ensure_str(content: _ContentPart | None) -> str: def ensure_str(content: _ContentPart | None) -> str:
...@@ -188,7 +186,6 @@ def get_score_prompt( ...@@ -188,7 +186,6 @@ def get_score_prompt(
data_1, data_1,
data_2, data_2,
model_config, model_config,
tokenizer,
) )
from vllm.model_executor.model_loader import get_model_cls from vllm.model_executor.model_loader import get_model_cls
......
...@@ -62,6 +62,7 @@ from vllm.multimodal.inputs import ( ...@@ -62,6 +62,7 @@ from vllm.multimodal.inputs import (
from vllm.multimodal.parse import ( from vllm.multimodal.parse import (
DictEmbeddingItems, DictEmbeddingItems,
ImageSize, ImageSize,
ModalityDataItems,
MultiModalDataItems, MultiModalDataItems,
MultiModalDataParser, MultiModalDataParser,
) )
...@@ -570,7 +571,7 @@ class HunYuanVLMultiModalDataParser(MultiModalDataParser): ...@@ -570,7 +571,7 @@ class HunYuanVLMultiModalDataParser(MultiModalDataParser):
def _parse_image_data( def _parse_image_data(
self, self,
data: dict[str, torch.Tensor] | ModalityData[ImageItem], data: dict[str, torch.Tensor] | ModalityData[ImageItem],
): ) -> ModalityDataItems[Any, Any] | None:
if isinstance(data, dict): if isinstance(data, dict):
return DictEmbeddingItems( return DictEmbeddingItems(
data, data,
......
...@@ -1000,7 +1000,7 @@ class KeyeMultiModalDataParser(MultiModalDataParser): ...@@ -1000,7 +1000,7 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
def _parse_image_data( def _parse_image_data(
self, self,
data: dict[str, torch.Tensor] | ModalityData[ImageItem], data: dict[str, torch.Tensor] | ModalityData[ImageItem],
) -> ModalityDataItems[Any, Any]: ) -> ModalityDataItems[Any, Any] | None:
if isinstance(data, dict): if isinstance(data, dict):
return DictEmbeddingItems( return DictEmbeddingItems(
data, data,
...@@ -1017,7 +1017,7 @@ class KeyeMultiModalDataParser(MultiModalDataParser): ...@@ -1017,7 +1017,7 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
def _parse_video_data( def _parse_video_data(
self, self,
data: dict[str, torch.Tensor] | ModalityData[VideoItem], data: dict[str, torch.Tensor] | ModalityData[VideoItem],
) -> ModalityDataItems[Any, Any]: ) -> ModalityDataItems[Any, Any] | None:
if isinstance(data, dict): if isinstance(data, dict):
return DictEmbeddingItems( return DictEmbeddingItems(
data, data,
......
...@@ -333,7 +333,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser): ...@@ -333,7 +333,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
def _parse_image_data( def _parse_image_data(
self, self,
data: dict[str, torch.Tensor] | ModalityData[ImageItem], data: dict[str, torch.Tensor] | ModalityData[ImageItem],
) -> ModalityDataItems[Any, Any]: ) -> ModalityDataItems[Any, Any] | None:
if isinstance(data, dict): if isinstance(data, dict):
return DictEmbeddingItems( return DictEmbeddingItems(
data, data,
...@@ -350,7 +350,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser): ...@@ -350,7 +350,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
def _parse_video_data( def _parse_video_data(
self, self,
data: dict[str, torch.Tensor] | ModalityData[VideoItem], data: dict[str, torch.Tensor] | ModalityData[VideoItem],
) -> ModalityDataItems[Any, Any]: ) -> ModalityDataItems[Any, Any] | None:
if isinstance(data, dict): if isinstance(data, dict):
return DictEmbeddingItems( return DictEmbeddingItems(
data, data,
......
...@@ -11,6 +11,7 @@ import pybase64 ...@@ -11,6 +11,7 @@ import pybase64
import torch import torch
from vllm.utils.import_utils import PlaceholderModule from vllm.utils.import_utils import PlaceholderModule
from vllm.utils.serial_utils import tensor2base64
from .base import MediaIO from .base import MediaIO
...@@ -135,8 +136,4 @@ class AudioEmbeddingMediaIO(MediaIO[torch.Tensor]): ...@@ -135,8 +136,4 @@ class AudioEmbeddingMediaIO(MediaIO[torch.Tensor]):
return torch.load(filepath, weights_only=True) return torch.load(filepath, weights_only=True)
def encode_base64(self, media: torch.Tensor) -> str: def encode_base64(self, media: torch.Tensor) -> str:
buffer = BytesIO() return tensor2base64(media)
torch.save(media, buffer)
buffer.seek(0)
binary_data = buffer.read()
return pybase64.b64encode(binary_data).decode("utf-8")
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64 import base64
import io
import sys import sys
from dataclasses import dataclass from dataclasses import dataclass
from typing import Literal from typing import Literal
...@@ -52,6 +53,15 @@ Endianness = Literal["native", "big", "little"] ...@@ -52,6 +53,15 @@ Endianness = Literal["native", "big", "little"]
EncodingFormat = Literal["float", "base64", "bytes"] EncodingFormat = Literal["float", "base64", "bytes"]
def tensor2base64(x: torch.Tensor) -> str:
with io.BytesIO() as buf:
torch.save(x, buf)
buf.seek(0)
binary_data = buf.read()
return base64.b64encode(binary_data).decode("utf-8")
def tensor2binary( def tensor2binary(
tensor: torch.Tensor, embed_dtype: EmbedDType, endianness: Endianness tensor: torch.Tensor, embed_dtype: EmbedDType, endianness: Endianness
) -> bytes: ) -> bytes:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment