Unverified Commit a68e293c authored by Hyogeun Oh (오효근)'s avatar Hyogeun Oh (오효근) Committed by GitHub
Browse files

[Doc] Convert Sphinx directives ( `{class}`, `{meth}`, `{attr}`, ...) to...


[Doc]  Convert Sphinx directives ( `{class}`, `{meth}`, `{attr}`, ...) to MkDocs format for better documentation linking (#18663)
Signed-off-by: default avatarZerohertz <ohg3417@gmail.com>
parent 68811079
......@@ -965,7 +965,7 @@ def select_tiling(
class MolmoProcessorWrapper:
"""
Wraps {class}`MolmoProcessor` so that it can be called directly.
Wraps `MolmoProcessor` so that it can be called directly.
The original definition can be found here:
https://huggingface.co/allenai/Molmo-7B-D-0924/blob/main/preprocessing_molmo.py
......
......@@ -67,14 +67,14 @@ class PixtralImagePixelInputs(TypedDict):
"""
Shape: `(batch_size * num_images, num_channels, image_width, image_height)`
The result of stacking {attr}`ImageEncoding.tokens` from each prompt.
The result of stacking `ImageEncoding.tokens` from each prompt.
"""
class PixtralProcessorAdapter:
"""
Provide a HF-compatible interface for
{class}`mistral_common.tokens.tokenizers.multimodal.ImageEncoder`.
`mistral_common.tokens.tokenizers.multimodal.ImageEncoder`.
"""
def __init__(self, tokenizer: MistralTokenizer) -> None:
......
......@@ -382,7 +382,8 @@ def _get_tokenizer_without_image_pad(
tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
"""
The logic of adding image pad tokens should only be applied in
{class}`QwenVLProcessor`, so they are patched out here.
[`QwenVLProcessor`][vllm.model_executor.models.qwen_vl.QwenVLProcessor],
so they are patched out here.
The definition of the wrapped tokenizer can be found here:
https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
......
......@@ -383,7 +383,7 @@ class _ModelRegistry:
`model_cls` can be either:
- A {class}`torch.nn.Module` class directly referencing the model.
- A [`torch.nn.Module`][] class directly referencing the model.
- A string in the format `<module>:<class>` which can be used to
lazily import the model. This is useful to avoid initializing CUDA
when importing the model and thus the related error
......
......@@ -66,7 +66,7 @@ class WeightsMapper:
class AutoWeightsLoader:
"""
Helper class to load weights into a {class}`torch.nn.Module`. It is able
Helper class to load weights into a [`torch.nn.Module`][]. It is able
to automatically detect child modules and parameters while iterating over
the weights only once.
......
......@@ -8,11 +8,12 @@ from .registry import MultiModalRegistry
MULTIMODAL_REGISTRY = MultiModalRegistry()
"""
The global {class}`~MultiModalRegistry` is used by model runners to
dispatch data processing according to the target model.
The global [`MultiModalRegistry`][vllm.multimodal.registry.MultiModalRegistry]
is used by model runners to dispatch data processing according to the target
model.
Info:
[mm-processing][]
[mm_processing](../../../design/mm_processing.html)
"""
__all__ = [
......
......@@ -29,14 +29,14 @@ _T = TypeVar("_T")
HfImageItem: TypeAlias = Union["Image", np.ndarray, "torch.Tensor"]
"""
A {class}`transformers.image_utils.ImageInput` representing a single image
A `transformers.image_utils.ImageInput` representing a single image
item, which can be passed to a HuggingFace `ImageProcessor`.
"""
HfVideoItem: TypeAlias = Union[list["Image"], np.ndarray, "torch.Tensor",
list[np.ndarray], list["torch.Tensor"]]
"""
A {class}`transformers.image_utils.VideoInput` representing a single video
A `transformers.image_utils.VideoInput` representing a single video
item, which can be passed to a HuggingFace `VideoProcessor`.
"""
......@@ -48,7 +48,7 @@ item, which can be passed to a HuggingFace `AudioProcessor`.
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"]
"""
A {class}`transformers.image_utils.ImageInput` representing a single image
A `transformers.image_utils.ImageInput` representing a single image
item, which can be passed to a HuggingFace `ImageProcessor`.
Alternatively, a 3-D tensor or batch of 2-D tensors,
......@@ -58,7 +58,7 @@ these are directly passed to the model without HF processing.
VideoItem: TypeAlias = Union[HfVideoItem, "torch.Tensor"]
"""
A {class}`transformers.image_utils.VideoInput` representing a single video
A `transformers.image_utils.VideoInput` representing a single video
item, which can be passed to a HuggingFace `VideoProcessor`.
Alternatively, a 3-D tensor or batch of 2-D tensors,
......@@ -108,7 +108,8 @@ MultiModalDataDict: TypeAlias = Mapping[str, ModalityData[Any]]
"""
A dictionary containing an entry for each modality type to input.
The built-in modalities are defined by {class}`MultiModalDataBuiltins`.
The built-in modalities are defined by
[`MultiModalDataBuiltins`][vllm.multimodal.inputs.MultiModalDataBuiltins].
"""
......@@ -169,7 +170,8 @@ Uses a list instead of a tensor if the dimensions of each element do not match.
def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
"""Equality check between {data}`NestedTensors` objects."""
"""Equality check between
[`NestedTensors`][vllm.multimodal.inputs.NestedTensors] objects."""
if isinstance(a, torch.Tensor):
return isinstance(b, torch.Tensor) and torch.equal(a, b)
elif isinstance(b, torch.Tensor):
......@@ -189,7 +191,7 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
BatchedTensorInputs: TypeAlias = Mapping[str, NestedTensors]
"""
A dictionary containing nested tensors which have been batched via
{meth}`MultiModalKwargs.batch`.
[`MultiModalKwargs.batch`][vllm.multimodal.inputs.MultiModalKwargs.batch].
"""
......@@ -197,7 +199,7 @@ A dictionary containing nested tensors which have been batched via
class MultiModalFieldElem:
"""
Represents a keyword argument corresponding to a multi-modal item
in {class}`MultiModalKwargs`.
in [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs].
"""
modality: str
......@@ -208,13 +210,15 @@ class MultiModalFieldElem:
key: str
"""
The key of this field in {class}`MultiModalKwargs`,
The key of this field in
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
i.e. the name of the keyword argument to be passed to the model.
"""
data: NestedTensors
"""
The tensor data of this field in {class}`MultiModalKwargs`,
The tensor data of this field in
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
i.e. the value of the keyword argument to be passed to the model.
"""
......@@ -237,7 +241,8 @@ class MultiModalFieldElem:
class BaseMultiModalField(ABC):
"""
Defines how to interpret tensor data belonging to a keyword argument in
{class}`MultiModalKwargs` for multiple multi-modal items, and vice versa.
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs] for multiple
multi-modal items, and vice versa.
"""
def _field_factory(self, *, modality: str, key: str):
......@@ -262,10 +267,12 @@ class BaseMultiModalField(ABC):
data: NestedTensors,
) -> Sequence[MultiModalFieldElem]:
"""
Construct {class}`MultiModalFieldElem` instances to represent
the provided data.
Construct
[`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
instances to represent the provided data.
This is the inverse of {meth}`reduce_data`.
This is the inverse of
[`reduce_data`][vllm.multimodal.inputs.BaseMultiModalField.reduce_data].
"""
raise NotImplementedError
......@@ -275,9 +282,11 @@ class BaseMultiModalField(ABC):
def reduce_data(self, elems: list[MultiModalFieldElem]) -> NestedTensors:
"""
Merge the data from multiple instances of {class}`MultiModalFieldElem`.
Merge the data from multiple instances of
[`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem].
This is the inverse of {meth}`build_elems`.
This is the inverse of
[`build_elems`][vllm.multimodal.inputs.BaseMultiModalField.build_elems].
"""
field_types = [type(item.field) for item in elems]
if len(set(field_types)) > 1:
......@@ -290,7 +299,7 @@ class BaseMultiModalField(ABC):
class MultiModalBatchedField(BaseMultiModalField):
"""
Info:
[MultiModalFieldConfig.batched][]
[`MultiModalFieldConfig.batched`][vllm.multimodal.inputs.MultiModalFieldConfig.batched]
"""
def build_elems(
......@@ -320,8 +329,8 @@ class MultiModalBatchedField(BaseMultiModalField):
class MultiModalFlatField(BaseMultiModalField):
"""
Info:
[MultiModalFieldConfig.flat][]
[MultiModalFieldConfig.flat_from_sizes][]
[`MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat]
[`MultiModalFieldConfig.flat_from_sizes`][vllm.multimodal.inputs.MultiModalFieldConfig.flat_from_sizes]
"""
slices: Union[Sequence[slice], Sequence[Sequence[slice]]]
dim: int = 0
......@@ -362,7 +371,7 @@ class MultiModalFlatField(BaseMultiModalField):
class MultiModalSharedField(BaseMultiModalField):
"""
Info:
[MultiModalFieldConfig.shared][]
[`MultiModalFieldConfig.shared`][vllm.multimodal.inputs.MultiModalFieldConfig.shared]
"""
batch_size: int
......@@ -508,7 +517,7 @@ class MultiModalFieldConfig:
```
Info:
[MultiModalFieldConfig.flat][]
[`MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat]
"""
if size_per_item.ndim != 1:
......@@ -572,8 +581,10 @@ class MultiModalFieldConfig:
class MultiModalKwargsItem(UserDict[str, MultiModalFieldElem]):
"""
A collection of {class}`MultiModalFieldElem`
corresponding to a data item in {class}`MultiModalDataItems`.
A collection of
[`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
corresponding to a data item in
[`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
"""
@staticmethod
......@@ -592,11 +603,13 @@ class MultiModalKwargsItem(UserDict[str, MultiModalFieldElem]):
class MultiModalKwargs(UserDict[str, NestedTensors]):
"""
A dictionary that represents the keyword arguments to
{meth}`~torch.nn.Module.forward`.
[`torch.nn.Module.forward`][].
The metadata `items` enables us to obtain the keyword arguments
corresponding to each data item in {class}`MultiModalDataItems`, via
{meth}`get_item` and {meth}`get_items`.
corresponding to each data item in
[`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems], via
[`get_item`][vllm.multimodal.inputs.MultiModalKwargs.get_item] and
[`get_items`][vllm.multimodal.inputs.MultiModalKwargs.get_items].
"""
@staticmethod
......@@ -635,7 +648,9 @@ class MultiModalKwargs(UserDict[str, NestedTensors]):
@staticmethod
def from_items(items: Sequence[MultiModalKwargsItem]):
"""Construct a new {class}`MultiModalKwargs` from multiple items."""
"""Construct a new
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs]
from multiple items."""
elems_by_key = defaultdict[str, list[MultiModalFieldElem]](list)
for item in items:
for key, elem in item.items():
......@@ -800,7 +815,7 @@ A dictionary containing placeholder ranges for each modality.
class MultiModalInputs(TypedDict):
"""
Represents the outputs of
{class}`vllm.multimodal.processing.BaseMultiModalProcessor`,
[`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor],
ready to be passed to vLLM internals.
"""
......@@ -836,7 +851,8 @@ class MultiModalInputs(TypedDict):
class MultiModalEncDecInputs(MultiModalInputs):
"""
Represents the outputs of {class}`vllm.multimodal.EncDecMultiModalProcessor`
Represents the outputs of
[`EncDecMultiModalProcessor`][vllm.multimodal.processing.EncDecMultiModalProcessor]
ready to be passed to vLLM internals.
"""
......
......@@ -28,7 +28,8 @@ else:
class ModalityDataItems(ABC, Generic[_T, _I]):
"""
Represents data items for a modality in {class}`MultiModalDataItems`.
Represents data items for a modality in
[`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
"""
def __init__(self, data: _T, modality: str) -> None:
......@@ -251,15 +252,15 @@ _D = TypeVar("_D", bound=ModalityDataItems[Any, Any])
class MultiModalDataItems(UserDict[str, ModalityDataItems[Any, Any]]):
"""
As {data}`~vllm.multimodal.inputs.MultiModalDataDict`, but normalized
such that each entry corresponds to a list.
As [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict], but
normalized such that each entry corresponds to a list.
"""
def get_count(self, modality: str, *, strict: bool = True) -> int:
"""
Get the number of data items belonging to a modality.
If `strict=False`, return `0` instead of raising {exc}`KeyError`
If `strict=False`, return `0` instead of raising [`KeyError`][]
even if the modality is not found.
"""
if modality not in self:
......@@ -305,8 +306,8 @@ ModalityDataParser: TypeAlias = Callable[[ModalityData[Any]],
class MultiModalDataParser:
"""
Parses {data}`~vllm.multimodal.inputs.MultiModalDataDict` into
{class}`MultiModalDataItems`.
Parses [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict]
into [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
Args:
target_sr (float, optional): Enables automatic resampling of audio
......
......@@ -114,13 +114,14 @@ class PromptUpdateDetails(Generic[_S]):
is_embed: Optional[Callable[["_BoundPromptSequence"], torch.Tensor]] = None
"""
Given {attr}`full`, return a boolean mask of shape `(len(full),)`
indicating which positions of `full` to assign embeddings to.
Given [`full`][vllm.multimodal.processing.PromptUpdateDetails.full],
return a boolean mask of shape `(len(full),)` indicating which positions
of `full` to assign embeddings to.
`None` (default) means to assign embeddings to all positions of `full`.
The embeddings are obtained by calling
{class}`SupportsMultiModal.get_multimodal_embeddings`.
[`SupportsMultiModal.get_multimodal_embeddings`][vllm.model_executor.models.interfaces.SupportsMultiModal.get_multimodal_embeddings].
"""
@staticmethod
......@@ -159,13 +160,15 @@ PromptUpdateInfo = Union[PromptSeq, PromptUpdateDetails]
The token sequence or text that are part of the update.
If only part of the content corresponds to feature placeholders, you can
use {class}`PromptUpdateDetails` to specify which part.
use [`PromptUpdateDetails`][vllm.multimodal.processing.PromptUpdateDetails] to
specify which part.
"""
PromptUpdateContent = Union[Callable[[int], PromptUpdateInfo],
PromptUpdateInfo]
"""
Given the index of the processed item within {attr}`modality`,
Given the index of the processed item within
[`modality`][vllm.multimodal.processing.PromptUpdate.modality],
output the corresponding token sequence (or text).
For convenience, you can directly pass in the token sequence (or text)
......@@ -260,8 +263,10 @@ class PromptInsertion(PromptUpdate):
insertion: PromptUpdateContent = field(repr=False)
"""
Given the index of the processed item within {attr}`modality`,
output the token sequence (or text) to insert right after {attr}`target`.
Given the index of the processed item within
[`modality`][vllm.multimodal.processing.PromptUpdate.modality],
output the token sequence (or text) to insert right after
[`target`][vllm.multimodal.processing.PromptUpdate.target].
For convenience, you can directly pass in the token sequence (or text)
instead of a function if it does not depend on the input.
......@@ -332,8 +337,10 @@ class PromptReplacement(PromptUpdate):
replacement: PromptUpdateContent = field(repr=False)
"""
Given the index of the processed item within {attr}`modality`,
output the token sequence (or text) to replace {attr}`target`.
Given the index of the processed item within
[`modality`][vllm.multimodal.processing.PromptUpdate.modality],
output the token sequence (or text) to replace
[`target`][vllm.multimodal.processing.PromptUpdate.target].
For convenience, you can directly pass in the token sequence (or text)
instead of a function if it does not depend on the input.
......@@ -387,14 +394,16 @@ _M = TypeVar("_M", bound=Union[_HasModalityAttr, _HasModalityProp])
def full_groupby_modality(values: Iterable[_M]) -> ItemsView[str, list[_M]]:
"""Convenience function to apply [full_groupby][] based on modality."""
"""Convenience function to apply [`full_groupby`][vllm.utils.full_groupby]
based on modality."""
return full_groupby(values, key=lambda x: x.modality)
@dataclass
class _BoundPromptSequence:
"""
A {data}`_PromptSeq` bound to a tokenizer to automatically
A [`_PromptSeq`][vllm.multimodal.processing.PromptSeq] bound
to a tokenizer to automatically
convert between token sequence and text representations.
"""
tokenizer: AnyTokenizer = field(repr=False)
......@@ -446,9 +455,11 @@ class _BoundPromptContent:
@dataclass
class BoundPromptUpdate:
"""
A {class}`PromptUpdate` bound to a tokenizer to automatically convert
{attr}`target` and the result of {meth}`get_content` between
token sequence and text representations.
A [`PromptUpdate`][vllm.multimodal.processing.PromptUpdate] bound
to a tokenizer to automatically convert
[`target`][vllm.multimodal.processing.PromptUpdate.target] and the result of
[`get_content`][vllm.multimodal.processing.BoundPromptUpdate.get_content]
between token sequence and text representations.
"""
_origin: PromptUpdate
tokenizer: AnyTokenizer = field(repr=False)
......@@ -482,7 +493,8 @@ class BoundPromptUpdate:
def get_content(self, item_idx: int) -> _BoundPromptContent:
"""
Given the index of the processed item within {attr}`modality`,
Given the index of the processed item within
[`modality`][vllm.multimodal.processing.PromptUpdate.modality],
output the token sequence (or text) to update.
"""
content = self.content
......@@ -1019,7 +1031,8 @@ class ProcessingCache:
) -> None:
"""
Put a processed multi-modal item into the cache
according to its dependencies (see {meth}`get`).
according to its dependencies
(see [`get`][vllm.multimodal.processing.ProcessingCache.get]).
"""
cache_key = MultiModalHasher.hash_kwargs(model_id=model_id,
**{modality: input_item},
......@@ -1091,7 +1104,8 @@ _I = TypeVar("_I", bound=BaseProcessingInfo)
MultiModalHashes = dict[str, list[str]]
"""
A collection of hashes with a similar structure as {class}`MultiModalKwargs`.
A collection of hashes with a similar structure as
[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs].
"""
......@@ -1099,7 +1113,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
"""
Abstract base class to process multi-modal inputs to be used in vLLM.
Not to be confused with {class}`transformers.ProcessorMixin`.
Not to be confused with `transformers.ProcessorMixin`.
"""
def __init__(self,
......@@ -1126,10 +1140,12 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
def _get_data_parser(self) -> MultiModalDataParser:
"""
Construct a parser to preprocess multi-modal data items
before passing them to {meth}`_get_hf_mm_data`.
before passing them to
[`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].
You can support additional modalities by creating a subclass
of {class}`MultiModalDataParser` that has additional subparsers.
of [`MultiModalDataParser`][vllm.multimodal.parse.MultiModalDataParser]
that has additional subparsers.
"""
return MultiModalDataParser()
......@@ -1138,8 +1154,11 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
mm_data: MultiModalDataDict,
) -> MultiModalDataItems:
"""
Normalize {class}`MultiModalDataDict` to {class}`MultiModalDataItems`
before passing them to {meth}`_get_hf_mm_data`.
Normalize
[`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict]
to [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems]
before passing them to
[`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].
"""
mm_items = self.data_parser.parse_mm_data(mm_data)
supported_mm_limits = self.info.get_supported_mm_limits()
......@@ -1191,7 +1210,8 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
inputs.
Moreover, this information is critical to determine the token positions
in order to construct {class}`~vllm-multimodal.input.PlaceholderRange`
in order to construct
[`PlaceholderRange`][vllm.multimodal.inputs.PlaceholderRange]
for each multi-modal item.
"""
raise NotImplementedError
......@@ -1315,7 +1335,9 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
Most HF processors accept prompt text but not prompt tokens.
If the HF processor adds or removes tokens that are not related to
multi-modal data, you should override this method so it is consistent
with the output of {meth}`_apply_hf_processor_text_only` on the
with the output of
[`_apply_hf_processor_text_only`][vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_text_only]
on the
corresponding text.
"""
return prompt_tokens
......@@ -1330,7 +1352,8 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
Since HF processor requires that text and multi-modal items
correspond to each other, we generate dummy text using
{class}`DummyInputsBuilder` to go along with the multi-modal data.
[`DummyInputsBuilder`][vllm.multimodal.profiling.BaseDummyInputsBuilder]
to go along with the multi-modal data.
"""
mm_counts = mm_items.get_all_counts()
......
......@@ -25,7 +25,7 @@ logger = init_logger(__name__)
class ProcessorInputs:
"""
Represents the keyword arguments to
{meth}`vllm.multimodal.processing.BaseMultiModalProcessor.apply`.
[`vllm.multimodal.processing.BaseMultiModalProcessor.apply`][].
"""
prompt: Union[str, list[int]]
mm_data: MultiModalDataDict
......
......@@ -29,7 +29,11 @@ _I_co = TypeVar("_I_co", bound=BaseProcessingInfo, covariant=True)
class ProcessingInfoFactory(Protocol[_I_co]):
"""Constructs a {class}`MultiModalProcessor` instance from the context."""
"""
Constructs a
[`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
instance from the context.
"""
def __call__(
self,
......@@ -40,7 +44,9 @@ class ProcessingInfoFactory(Protocol[_I_co]):
class DummyInputsBuilderFactory(Protocol[_I]):
"""
Constructs a {class}`BaseDummyInputsBuilder` instance from the context.
Constructs a
[`BaseDummyInputsBuilder`][vllm.multimodal.profiling.BaseDummyInputsBuilder]
instance from the context.
"""
def __call__(self, info: _I) -> BaseDummyInputsBuilder[_I]:
......@@ -48,7 +54,11 @@ class DummyInputsBuilderFactory(Protocol[_I]):
class MultiModalProcessorFactory(Protocol[_I]):
"""Constructs a {class}`MultiModalProcessor` instance from the context."""
"""
Constructs a
[`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
instance from the context.
"""
def __call__(
self,
......@@ -155,8 +165,6 @@ class MultiModalRegistry:
"""
Get the maximum number of tokens from each modality
for profiling the memory usage of a model.
See {meth}`MultiModalPlugin.get_max_multimodal_tokens` for more details.
"""
mm_limits = self.get_mm_limits_per_prompt(model_config)
......@@ -170,8 +178,6 @@ class MultiModalRegistry:
"""
Get the maximum number of multi-modal tokens
for profiling the memory usage of a model.
See {meth}`MultiModalPlugin.get_max_multimodal_tokens` for more details.
"""
return sum(self.get_max_tokens_by_modality(model_config).values())
......@@ -213,9 +219,6 @@ class MultiModalRegistry:
When the model receives multi-modal data, the provided function is
invoked to transform the data into a dictionary of model inputs.
Info:
[mm-processing][]
"""
def wrapper(model_cls: N) -> N:
......@@ -258,9 +261,6 @@ class MultiModalRegistry:
) -> BaseMultiModalProcessor[BaseProcessingInfo]:
"""
Create a multi-modal processor for a specific model and tokenizer.
Info:
[mm-processing][]
"""
if not model_config.is_multimodal_model:
raise ValueError(f"{model_config.model} is not a multimodal model")
......
......@@ -259,7 +259,8 @@ class MediaConnector:
global_media_connector = MediaConnector()
"""The global {class}`MediaConnector` instance used by vLLM."""
"""The global [`MediaConnector`][vllm.multimodal.utils.MediaConnector]
instance used by vLLM."""
fetch_audio = global_media_connector.fetch_audio
fetch_image = global_media_connector.fetch_image
......
......@@ -84,7 +84,7 @@ class DeviceCapability(NamedTuple):
def to_int(self) -> int:
"""
Express device capability as an integer ``<major><minor>``.
Express device capability as an integer `<major><minor>`.
It is assumed that the minor version is always a single digit.
"""
......@@ -206,10 +206,11 @@ class Platform:
"""
Test whether this platform is compatible with a device capability.
The ``capability`` argument can either be:
The `capability` argument can either be:
- A tuple ``(major, minor)``.
- An integer ``<major><minor>``. (See {meth}`DeviceCapability.to_int`)
- A tuple `(major, minor)`.
- An integer `<major><minor>`. (See
[`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int])
"""
current_capability = cls.get_device_capability(device_id=device_id)
if current_capability is None:
......
......@@ -27,7 +27,7 @@ VLLM_INVALID_TOKEN_ID = -1
def array_full(token_id: int, count: int):
"""{class}`array` equivalent of [numpy.full][]."""
"""[`array`][] equivalent of [numpy.full][]."""
return array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count
......@@ -192,8 +192,8 @@ class SequenceData(msgspec.Struct,
def from_prompt_token_counts(
*token_counts: tuple[int, int]) -> "SequenceData":
"""
Construct a {class}`SequenceData` instance by concatenating
prompt token sequences.
Construct a [`SequenceData`][vllm.sequence.SequenceData] instance
by concatenating prompt token sequences.
Each tuple represents one token sequence, expressed in the form
`(token_id, count)`.
......@@ -216,8 +216,8 @@ class SequenceData(msgspec.Struct,
prompt_embeds: Optional[torch.Tensor] = None,
) -> "SequenceData":
"""
Construct a {class}`SequenceData` instance from prompt and output
token sequences.
Construct a [`SequenceData`][vllm.sequence.SequenceData] instance
from prompt and output token sequences.
"""
prompt_token_ids_arr = array(VLLM_TOKEN_ID_ARRAY_TYPE,
prompt_token_ids)
......@@ -452,9 +452,11 @@ class SequenceData(msgspec.Struct,
class Sequence:
"""Stores the data, status, and block information of a sequence.
The sequence is constructed from the {data}`DecoderOnlyInputs`
(for decoder-only) or {data}`EncoderDecoderInputs` (for encoder-decoder)
instance passed in through the `inputs` constructor argument.
The sequence is constructed from the
[`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] (for decoder-only)
or [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
(for encoder-decoder) instance passed in through the `inputs`
constructor argument.
Args:
seq_id: The ID of the sequence.
......
......@@ -1005,7 +1005,7 @@ def flatten_2d_lists(lists: Iterable[Iterable[T]]) -> list[T]:
def full_groupby(values: Iterable[_V], *, key: Callable[[_V], _K]):
"""
Unlike {class}`itertools.groupby`, groups are not broken by
Unlike [`itertools.groupby`][], groups are not broken by
non-contiguous data.
"""
groups = defaultdict[_K, list[_V]](list)
......@@ -1926,7 +1926,8 @@ class _PlaceholderBase:
Disallows downstream usage of placeholder modules.
We need to explicitly override each dunder method because
{meth}`__getattr__` is not called when they are accessed.
[`__getattr__`][vllm.utils._PlaceholderBase.__getattr__]
is not called when they are accessed.
Info:
[Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
......
......@@ -10,7 +10,7 @@ def sanity_check_mm_encoder_outputs(
) -> None:
"""
Perform sanity checks for the result of
{meth}`vllm.model_executor.models.SupportsMultiModal.get_multimodal_embeddings`.
[`vllm.model_executor.models.SupportsMultiModal.get_multimodal_embeddings`][].
"""
assert isinstance(mm_embeddings, (list, tuple, torch.Tensor)), (
"Expected multimodal embeddings to be a list/tuple of 2D tensors, "
......@@ -39,7 +39,7 @@ def scatter_mm_placeholders(
Scatter the multimodal embeddings into a contiguous tensor that represents
the placeholder tokens.
{class}`vllm.multimodal.processing.PromptUpdateDetails.is_embed`.
[`vllm.multimodal.processing.PromptUpdateDetails.is_embed`][].
Args:
embeds: The multimodal embeddings.
......
......@@ -733,12 +733,13 @@ def _pythonize_sampler_output(
logprobs_tensor: Optional[torch.Tensor],
cache: Optional[PythonizationCache],
) -> None:
""" This function is only called when the output tensors are ready.
See {class}`ModelOutput`.
Modifies `output.outputs` and `pinned_sampled_token_buffer` in-place,
""" This function is only called when the output tensors are ready.
See [`ModelOutput`][vllm.worker.multi_step_model_runner.ModelOutput].
Modifies `output.outputs` and `pinned_sampled_token_buffer` in-place,
adding a Pythonized output data structure
({class}`CompletionSequenceGroupOutput`) for each {class}`SequenceGroup`.
([`CompletionSequenceGroupOutput`][vllm.sequence.CompletionSequenceGroupOutput])
for each [`SequenceGroup`][vllm.sequence.SequenceGroup].
Args:
model_input
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment