Unverified Commit 0b9cc56f authored by Benji Beck's avatar Benji Beck Committed by GitHub
Browse files

Migrate MllamaImagePixelInputs to TensorSchema (#22020)


Signed-off-by: default avatarBenji Beck <benjibeck@meta.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 8896eb72
......@@ -17,7 +17,7 @@
"""PyTorch Mllama model."""
import math
from collections.abc import Iterable, Mapping, Sequence
from typing import Literal, Optional, TypedDict, Union
from typing import Annotated, Literal, Optional, Union
import numpy as np
import torch
......@@ -64,6 +64,7 @@ from vllm.multimodal.processing import (BaseProcessingInfo,
EncDecMultiModalProcessor,
PromptReplacement, PromptUpdate)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .clip import CLIPMLP
from .interfaces import SupportsMultiModal, SupportsV0Only
......@@ -73,15 +74,30 @@ from .utils import AutoWeightsLoader, WeightsMapper, maybe_prefix
logger = init_logger(__name__)
class MllamaImagePixelInputs(TypedDict):
type: Literal["pixel_values"]
data: torch.Tensor
"""Shape: """
"""(batch_size, max_num_image, max_num_chunk, num_channel, height, width)"""
aspect_ratio_ids: torch.Tensor
"""Shape: `(batch_size, max_num_image)`"""
aspect_ratio_mask: torch.Tensor
"""Shape: `(batch_size, max_num_image, max_num_tiles)`"""
class MllamaImagePixelInputs(TensorSchema):
"""
Dimensions:
- batch_size: Batch size
- max_num_image: Max number of images
- max_num_chunk: Max number of chunks
- max_num_tiles: Max number of tiles per image
- num_channel: Number of channels
- height: Height
- width: Width
"""
type: Literal["pixel_values"] = "pixel_values"
data: Annotated[torch.Tensor,
TensorShape("batch_size", "max_num_image", "max_num_chunk",
"num_channel", "height", "width")]
aspect_ratio_ids: Annotated[torch.Tensor,
TensorShape("batch_size", "max_num_image")]
aspect_ratio_mask: Annotated[
torch.Tensor,
TensorShape("batch_size", "max_num_image", "max_num_tiles")]
# TODO: support LlamaImageEmbeddingInputs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment