Unverified Commit 1d0c0d20 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Misc] Lazy import registered processors (#36024)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: default avatarRoger Wang <hey@rogerw.io>
parent fcb73f30
......@@ -1020,18 +1020,15 @@ _MULTIMODAL_EXAMPLE_MODELS = {
min_transformers_version="4.57",
),
"Qwen3ASRForConditionalGeneration": _HfExamplesInfo(
"Qwen/Qwen3-ASR-1.7B",
"Qwen/Qwen3-ASR-0.6B",
max_model_len=4096,
min_transformers_version="4.57",
is_available_online=False,
),
"Qwen3ASRRealtimeGeneration": _HfExamplesInfo(
"Qwen/Qwen3-ASR-1.7B",
"Qwen/Qwen3-ASR-0.6B",
max_model_len=4096,
min_transformers_version="4.57",
enforce_eager=True,
hf_overrides={"architectures": ["Qwen3ASRRealtimeGeneration"]},
is_available_online=False,
),
"RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True),
"SkyworkR1VChatModel": _HfExamplesInfo(
......
......@@ -48,7 +48,6 @@ from vllm.transformers_utils.configs.deepseek_vl2 import (
MlpProjectorConfig,
VisionEncoderConfig,
)
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype
......@@ -160,7 +159,7 @@ class DeepseekVL2ProcessingInfo(BaseProcessingInfo):
return self.ctx.get_hf_config(DeepseekVLV2Config)
def get_hf_processor(self, **kwargs: object):
return self.ctx.get_hf_processor(DeepseekVLV2Processor, **kwargs)
return self.ctx.get_hf_processor(**kwargs)
def get_supported_mm_limits(self) -> Mapping[str, int | None]:
return {"image": None}
......
......@@ -41,7 +41,7 @@ from vllm.multimodal.processing import (
PromptUpdateDetails,
)
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.processors.fireredasr2_processor import (
from vllm.transformers_utils.processors.fireredasr2 import (
FireRedASR2FeatureExtractor,
)
from vllm.utils.tensor_schema import TensorSchema, TensorShape
......
......@@ -50,7 +50,7 @@ from vllm.multimodal.processing import (
PromptUpdate,
)
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.processors.funasr_processor import FunASRFeatureExtractor
from vllm.transformers_utils.processors.funasr import FunASRFeatureExtractor
from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import (
......
......@@ -20,7 +20,9 @@ from transformers.video_processing_utils import BaseVideoProcessor
from typing_extensions import TypeVar
from vllm.logger import init_logger
from vllm.transformers_utils import processors
from vllm.transformers_utils.gguf_utils import is_gguf
from vllm.transformers_utils.repo_utils import get_hf_file_to_dict
from vllm.transformers_utils.utils import convert_model_repo_to_path
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
......@@ -139,6 +141,22 @@ def _merge_mm_kwargs(
return allowed_kwargs
def get_processor_cls_name_from_config(
processor_name: str,
revision: str | None = "main",
) -> str | None:
config_file = [
"processor_config.json",
"preprocessor_config.json",
"tokenizer_config.json",
]
for file in config_file:
config = get_hf_file_to_dict(file, processor_name, revision=revision)
if config and "processor_class" in config:
return config["processor_class"]
return None
def get_processor(
processor_name: str,
*args: Any,
......@@ -152,8 +170,20 @@ def get_processor(
revision = "main"
try:
processor_name = convert_model_repo_to_path(processor_name)
registered_cls_name = get_processor_cls_name_from_config(
processor_name, revision=revision
)
registered_processor_cls = (
getattr(processors, registered_cls_name, None)
if registered_cls_name
else None
)
registered_processor_cls = cast(type[_P] | None, registered_processor_cls)
# Use registered processor class when it's available
# and explicit processor_cls is not set.
if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
processor = AutoProcessor.from_pretrained(
_processor_cls = registered_processor_cls or AutoProcessor
processor = _processor_cls.from_pretrained(
processor_name,
*args,
revision=revision,
......
......@@ -8,16 +8,20 @@ reasons:
- There is a need to override the existing processor to support vLLM.
"""
from vllm.transformers_utils.processors.bagel import BagelProcessor
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.transformers_utils.processors.fireredasr2_processor import (
FireRedASR2Processor,
)
from vllm.transformers_utils.processors.funasr_processor import FunASRProcessor
from vllm.transformers_utils.processors.hunyuan_vl import HunYuanVLProcessor
from vllm.transformers_utils.processors.hunyuan_vl_image import HunYuanVLImageProcessor
from vllm.transformers_utils.processors.ovis import OvisProcessor
from vllm.transformers_utils.processors.ovis2_5 import Ovis2_5Processor
import importlib
_CLASS_TO_MODULE: dict[str, str] = {
"BagelProcessor": "vllm.transformers_utils.processors.bagel",
"DeepseekVLV2Processor": "vllm.transformers_utils.processors.deepseek_vl2",
"FireRedASR2Processor": "vllm.transformers_utils.processors.fireredasr2",
"FunASRProcessor": "vllm.transformers_utils.processors.funasr",
"HunYuanVLProcessor": "vllm.transformers_utils.processors.hunyuan_vl",
"HunYuanVLImageProcessor": "vllm.transformers_utils.processors.hunyuan_vl_image",
"OvisProcessor": "vllm.transformers_utils.processors.ovis",
"Ovis2_5Processor": "vllm.transformers_utils.processors.ovis2_5",
"Qwen3ASRProcessor": "vllm.transformers_utils.processors.qwen3_asr",
}
__all__ = [
"BagelProcessor",
......@@ -28,4 +32,18 @@ __all__ = [
"HunYuanVLImageProcessor",
"OvisProcessor",
"Ovis2_5Processor",
"Qwen3ASRProcessor",
]
def __getattr__(name: str):
if name in _CLASS_TO_MODULE:
module_name = _CLASS_TO_MODULE[name]
module = importlib.import_module(module_name)
return getattr(module, name)
raise AttributeError(f"module 'processors' has no attribute '{name}'")
def __dir__():
return sorted(list(__all__))
......@@ -3,7 +3,6 @@
# Copyright 2025 Bytedance Ltd. and/or its affiliates.
"""BAGEL processor for image and text inputs."""
from transformers import AutoProcessor
from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
......@@ -79,6 +78,3 @@ class BagelProcessor(ProcessorMixin):
tokenizer_input_names = self.tokenizer.model_input_names
image_processor_input_names = self.image_processor.model_input_names
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
AutoProcessor.register("BagelProcessor", BagelProcessor)
......@@ -8,7 +8,7 @@ from typing import Literal
import torch
import torchvision.transforms as T
from PIL import Image, ImageOps
from transformers import AutoProcessor, BatchFeature, LlamaTokenizerFast
from transformers import BatchFeature, LlamaTokenizerFast
from transformers.processing_utils import ProcessorMixin
# TODO(Isotr0py): change modes for variants
......@@ -453,6 +453,3 @@ class DeepseekOCRProcessor(ProcessorMixin):
num_image_tokens,
image_shapes,
)
AutoProcessor.register("DeepseekOCRProcessor", DeepseekOCRProcessor)
......@@ -29,7 +29,7 @@ from typing import Any
import torch
import torchvision.transforms as T
from PIL import Image, ImageOps
from transformers import AutoProcessor, BatchFeature, LlamaTokenizerFast
from transformers import BatchFeature, LlamaTokenizerFast
from transformers.processing_utils import ProcessorMixin
......@@ -401,6 +401,3 @@ class DeepseekVLV2Processor(ProcessorMixin):
images_spatial_crop,
num_image_tokens,
)
AutoProcessor.register("DeepseekVLV2Processor", DeepseekVLV2Processor)
......@@ -8,7 +8,6 @@ import torch
import torch.nn.functional as F
from transformers import (
AutoFeatureExtractor,
AutoProcessor,
BatchFeature,
)
from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
......@@ -345,4 +344,3 @@ class FireRedASR2Processor(ProcessorMixin):
AutoFeatureExtractor.register(
"FireRedASR2FeatureExtractor", FireRedASR2FeatureExtractor
)
AutoProcessor.register("FireRedASR2Processor", FireRedASR2Processor)
......@@ -9,7 +9,6 @@ import torchaudio.compliance.kaldi as kaldi
from torch.nn.utils.rnn import pad_sequence
from transformers import (
AutoFeatureExtractor,
AutoProcessor,
BatchFeature,
)
from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
......@@ -503,4 +502,3 @@ class FunASRProcessor(ProcessorMixin):
AutoFeatureExtractor.register("FunASRFeatureExtractor", FunASRFeatureExtractor)
AutoProcessor.register("FunASRProcessor", FunASRProcessor)
......@@ -5,7 +5,6 @@
import numpy as np
import torch
from transformers import AutoProcessor
from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessorMixin
......@@ -225,6 +224,3 @@ def split_image_into_patch_blocks(
patches = img.reshape(-1, 3, patch_size, patch_size)
return patches
AutoProcessor.register("HunYuanVLProcessor", HunYuanVLProcessor)
......@@ -26,7 +26,7 @@ from functools import cached_property
import PIL
import torch
from transformers import AutoProcessor, BatchFeature
from transformers import BatchFeature
from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
......@@ -453,6 +453,3 @@ class OvisProcessor(ProcessorMixin):
dict.fromkeys(tokenizer_input_names + image_processor_input_names)
)
return names_from_processor + ["second_per_grid_ts"]
AutoProcessor.register("OvisProcessor", OvisProcessor)
......@@ -6,7 +6,7 @@ from functools import cached_property
import numpy as np
import PIL
import torch
from transformers import AutoProcessor, BatchFeature
from transformers import BatchFeature
from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
......@@ -476,6 +476,3 @@ class Ovis2_5Processor(ProcessorMixin):
visual_placeholders,
torch.tensor([[grid_t, grid_h, grid_w]]),
)
AutoProcessor.register("Ovis2_5Processor", Ovis2_5Processor)
......@@ -227,6 +227,3 @@ class Qwen3ASRProcessor(ProcessorMixin):
+ ["feature_attention_mask"]
)
)
AutoProcessor.register("Qwen3ASRProcessor", Qwen3ASRProcessor)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment