"vscode:/vscode.git/clone" did not exist on "19dcc02a72e3ed52e3bf95aae44ea1f40ce42ea0"
Unverified Commit 8f7bace7 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Doc] Improve documentation for multimodal CLI args (#16960)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent e4d61442
......@@ -54,13 +54,15 @@ if TYPE_CHECKING:
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup)
Config = TypeVar("Config", bound=DataclassInstance)
ConfigType = type[DataclassInstance]
else:
QuantizationConfig = None
Config = TypeVar("Config")
ConfigType = type
logger = init_logger(__name__)
ConfigT = TypeVar("ConfigT", bound=ConfigType)
# This value is chosen to have a balance between ITL and TTFT. Note it is
# not optimized for throughput.
_DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
......@@ -162,7 +164,7 @@ def get_attr_docs(cls: type[Any]) -> dict[str, str]:
return out
def config(cls: type[Config]) -> type[Config]:
def config(cls: ConfigT) -> ConfigT:
"""
A decorator that ensures all fields in a dataclass have default values
and that each field has a docstring.
......@@ -181,7 +183,7 @@ def config(cls: type[Config]) -> type[Config]:
return cls
def get_field(cls: type[Config], name: str) -> Field:
def get_field(cls: ConfigType, name: str) -> Field:
"""Get the default factory field of a dataclass by name. Used for getting
default factory fields in `EngineArgs`."""
if not is_dataclass(cls):
......@@ -2749,6 +2751,9 @@ class MultiModalConfig:
The maximum number of input items allowed per prompt for each modality.
This should be a JSON string that will be parsed into a dictionary.
Defaults to 1 (V0) or 999 (V1) for each modality.
For example, to allow up to 16 images and 2 videos per prompt:
``{"images": 16, "videos": 2}``
"""
def compute_hash(self) -> str:
......
......@@ -17,7 +17,7 @@ from typing_extensions import TypeIs
import vllm.envs as envs
from vllm import version
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
Config, ConfigFormat, DecodingConfig, Device,
ConfigFormat, ConfigType, DecodingConfig, Device,
DeviceConfig, DistributedExecutorBackend, HfOverrides,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ModelImpl, MultiModalConfig,
......@@ -304,7 +304,7 @@ class EngineArgs:
"""Check if the class is a custom type."""
return cls.__module__ != "builtins"
def get_kwargs(cls: type[Config]) -> dict[str, Any]:
def get_kwargs(cls: ConfigType) -> dict[str, Any]:
cls_docs = get_attr_docs(cls)
kwargs = {}
for field in fields(cls):
......@@ -678,13 +678,15 @@ class EngineArgs:
'--mm-processor-kwargs',
default=None,
type=json.loads,
help=('Overrides for the multimodal input mapping/processing, '
'e.g., image processor. For example: ``{"num_crops": 4}``.'))
help=('Overrides for the multi-modal processor obtained from '
'``AutoProcessor.from_pretrained``. The available overrides '
'depend on the model that is being run.'
'For example, for Phi-3-Vision: ``{"num_crops": 4}``.'))
parser.add_argument(
'--disable-mm-preprocessor-cache',
action='store_true',
help='If true, then disables caching of the multi-modal '
'preprocessor/mapper. (not recommended)')
help='If True, disable caching of the processed multi-modal '
'inputs.')
# LoRA related configs
parser.add_argument('--enable-lora',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment