Unverified Commit 1d0c0d20 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Misc] Lazy import registered processors (#36024)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: default avatarRoger Wang <hey@rogerw.io>
parent fcb73f30
...@@ -1020,18 +1020,15 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -1020,18 +1020,15 @@ _MULTIMODAL_EXAMPLE_MODELS = {
min_transformers_version="4.57", min_transformers_version="4.57",
), ),
"Qwen3ASRForConditionalGeneration": _HfExamplesInfo( "Qwen3ASRForConditionalGeneration": _HfExamplesInfo(
"Qwen/Qwen3-ASR-1.7B", "Qwen/Qwen3-ASR-0.6B",
max_model_len=4096, max_model_len=4096,
min_transformers_version="4.57", min_transformers_version="4.57",
is_available_online=False,
), ),
"Qwen3ASRRealtimeGeneration": _HfExamplesInfo( "Qwen3ASRRealtimeGeneration": _HfExamplesInfo(
"Qwen/Qwen3-ASR-1.7B", "Qwen/Qwen3-ASR-0.6B",
max_model_len=4096, max_model_len=4096,
min_transformers_version="4.57", min_transformers_version="4.57",
enforce_eager=True,
hf_overrides={"architectures": ["Qwen3ASRRealtimeGeneration"]}, hf_overrides={"architectures": ["Qwen3ASRRealtimeGeneration"]},
is_available_online=False,
), ),
"RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True), "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True),
"SkyworkR1VChatModel": _HfExamplesInfo( "SkyworkR1VChatModel": _HfExamplesInfo(
......
...@@ -48,7 +48,6 @@ from vllm.transformers_utils.configs.deepseek_vl2 import ( ...@@ -48,7 +48,6 @@ from vllm.transformers_utils.configs.deepseek_vl2 import (
MlpProjectorConfig, MlpProjectorConfig,
VisionEncoderConfig, VisionEncoderConfig,
) )
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from vllm.utils.torch_utils import set_default_torch_dtype from vllm.utils.torch_utils import set_default_torch_dtype
...@@ -160,7 +159,7 @@ class DeepseekVL2ProcessingInfo(BaseProcessingInfo): ...@@ -160,7 +159,7 @@ class DeepseekVL2ProcessingInfo(BaseProcessingInfo):
return self.ctx.get_hf_config(DeepseekVLV2Config) return self.ctx.get_hf_config(DeepseekVLV2Config)
def get_hf_processor(self, **kwargs: object): def get_hf_processor(self, **kwargs: object):
return self.ctx.get_hf_processor(DeepseekVLV2Processor, **kwargs) return self.ctx.get_hf_processor(**kwargs)
def get_supported_mm_limits(self) -> Mapping[str, int | None]: def get_supported_mm_limits(self) -> Mapping[str, int | None]:
return {"image": None} return {"image": None}
......
...@@ -41,7 +41,7 @@ from vllm.multimodal.processing import ( ...@@ -41,7 +41,7 @@ from vllm.multimodal.processing import (
PromptUpdateDetails, PromptUpdateDetails,
) )
from vllm.transformers_utils.processor import cached_processor_from_config from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.processors.fireredasr2_processor import ( from vllm.transformers_utils.processors.fireredasr2 import (
FireRedASR2FeatureExtractor, FireRedASR2FeatureExtractor,
) )
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
......
...@@ -50,7 +50,7 @@ from vllm.multimodal.processing import ( ...@@ -50,7 +50,7 @@ from vllm.multimodal.processing import (
PromptUpdate, PromptUpdate,
) )
from vllm.transformers_utils.processor import cached_processor_from_config from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.processors.funasr_processor import FunASRFeatureExtractor from vllm.transformers_utils.processors.funasr import FunASRFeatureExtractor
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import ( from .interfaces import (
......
...@@ -20,7 +20,9 @@ from transformers.video_processing_utils import BaseVideoProcessor ...@@ -20,7 +20,9 @@ from transformers.video_processing_utils import BaseVideoProcessor
from typing_extensions import TypeVar from typing_extensions import TypeVar
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.transformers_utils import processors
from vllm.transformers_utils.gguf_utils import is_gguf from vllm.transformers_utils.gguf_utils import is_gguf
from vllm.transformers_utils.repo_utils import get_hf_file_to_dict
from vllm.transformers_utils.utils import convert_model_repo_to_path from vllm.transformers_utils.utils import convert_model_repo_to_path
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
...@@ -139,6 +141,22 @@ def _merge_mm_kwargs( ...@@ -139,6 +141,22 @@ def _merge_mm_kwargs(
return allowed_kwargs return allowed_kwargs
def get_processor_cls_name_from_config(
processor_name: str,
revision: str | None = "main",
) -> str | None:
config_file = [
"processor_config.json",
"preprocessor_config.json",
"tokenizer_config.json",
]
for file in config_file:
config = get_hf_file_to_dict(file, processor_name, revision=revision)
if config and "processor_class" in config:
return config["processor_class"]
return None
def get_processor( def get_processor(
processor_name: str, processor_name: str,
*args: Any, *args: Any,
...@@ -152,8 +170,20 @@ def get_processor( ...@@ -152,8 +170,20 @@ def get_processor(
revision = "main" revision = "main"
try: try:
processor_name = convert_model_repo_to_path(processor_name) processor_name = convert_model_repo_to_path(processor_name)
registered_cls_name = get_processor_cls_name_from_config(
processor_name, revision=revision
)
registered_processor_cls = (
getattr(processors, registered_cls_name, None)
if registered_cls_name
else None
)
registered_processor_cls = cast(type[_P] | None, registered_processor_cls)
# Use registered processor class when it's available
# and explicit processor_cls is not set.
if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin: if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
processor = AutoProcessor.from_pretrained( _processor_cls = registered_processor_cls or AutoProcessor
processor = _processor_cls.from_pretrained(
processor_name, processor_name,
*args, *args,
revision=revision, revision=revision,
......
...@@ -8,16 +8,20 @@ reasons: ...@@ -8,16 +8,20 @@ reasons:
- There is a need to override the existing processor to support vLLM. - There is a need to override the existing processor to support vLLM.
""" """
from vllm.transformers_utils.processors.bagel import BagelProcessor import importlib
from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
from vllm.transformers_utils.processors.fireredasr2_processor import ( _CLASS_TO_MODULE: dict[str, str] = {
FireRedASR2Processor, "BagelProcessor": "vllm.transformers_utils.processors.bagel",
) "DeepseekVLV2Processor": "vllm.transformers_utils.processors.deepseek_vl2",
from vllm.transformers_utils.processors.funasr_processor import FunASRProcessor "FireRedASR2Processor": "vllm.transformers_utils.processors.fireredasr2",
from vllm.transformers_utils.processors.hunyuan_vl import HunYuanVLProcessor "FunASRProcessor": "vllm.transformers_utils.processors.funasr",
from vllm.transformers_utils.processors.hunyuan_vl_image import HunYuanVLImageProcessor "HunYuanVLProcessor": "vllm.transformers_utils.processors.hunyuan_vl",
from vllm.transformers_utils.processors.ovis import OvisProcessor "HunYuanVLImageProcessor": "vllm.transformers_utils.processors.hunyuan_vl_image",
from vllm.transformers_utils.processors.ovis2_5 import Ovis2_5Processor "OvisProcessor": "vllm.transformers_utils.processors.ovis",
"Ovis2_5Processor": "vllm.transformers_utils.processors.ovis2_5",
"Qwen3ASRProcessor": "vllm.transformers_utils.processors.qwen3_asr",
}
__all__ = [ __all__ = [
"BagelProcessor", "BagelProcessor",
...@@ -28,4 +32,18 @@ __all__ = [ ...@@ -28,4 +32,18 @@ __all__ = [
"HunYuanVLImageProcessor", "HunYuanVLImageProcessor",
"OvisProcessor", "OvisProcessor",
"Ovis2_5Processor", "Ovis2_5Processor",
"Qwen3ASRProcessor",
] ]
def __getattr__(name: str):
if name in _CLASS_TO_MODULE:
module_name = _CLASS_TO_MODULE[name]
module = importlib.import_module(module_name)
return getattr(module, name)
raise AttributeError(f"module 'processors' has no attribute '{name}'")
def __dir__():
return sorted(list(__all__))
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
# Copyright 2025 Bytedance Ltd. and/or its affiliates. # Copyright 2025 Bytedance Ltd. and/or its affiliates.
"""BAGEL processor for image and text inputs.""" """BAGEL processor for image and text inputs."""
from transformers import AutoProcessor
from transformers.feature_extraction_utils import BatchFeature from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
...@@ -79,6 +78,3 @@ class BagelProcessor(ProcessorMixin): ...@@ -79,6 +78,3 @@ class BagelProcessor(ProcessorMixin):
tokenizer_input_names = self.tokenizer.model_input_names tokenizer_input_names = self.tokenizer.model_input_names
image_processor_input_names = self.image_processor.model_input_names image_processor_input_names = self.image_processor.model_input_names
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names)) return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
AutoProcessor.register("BagelProcessor", BagelProcessor)
...@@ -8,7 +8,7 @@ from typing import Literal ...@@ -8,7 +8,7 @@ from typing import Literal
import torch import torch
import torchvision.transforms as T import torchvision.transforms as T
from PIL import Image, ImageOps from PIL import Image, ImageOps
from transformers import AutoProcessor, BatchFeature, LlamaTokenizerFast from transformers import BatchFeature, LlamaTokenizerFast
from transformers.processing_utils import ProcessorMixin from transformers.processing_utils import ProcessorMixin
# TODO(Isotr0py): change modes for variants # TODO(Isotr0py): change modes for variants
...@@ -453,6 +453,3 @@ class DeepseekOCRProcessor(ProcessorMixin): ...@@ -453,6 +453,3 @@ class DeepseekOCRProcessor(ProcessorMixin):
num_image_tokens, num_image_tokens,
image_shapes, image_shapes,
) )
AutoProcessor.register("DeepseekOCRProcessor", DeepseekOCRProcessor)
...@@ -29,7 +29,7 @@ from typing import Any ...@@ -29,7 +29,7 @@ from typing import Any
import torch import torch
import torchvision.transforms as T import torchvision.transforms as T
from PIL import Image, ImageOps from PIL import Image, ImageOps
from transformers import AutoProcessor, BatchFeature, LlamaTokenizerFast from transformers import BatchFeature, LlamaTokenizerFast
from transformers.processing_utils import ProcessorMixin from transformers.processing_utils import ProcessorMixin
...@@ -401,6 +401,3 @@ class DeepseekVLV2Processor(ProcessorMixin): ...@@ -401,6 +401,3 @@ class DeepseekVLV2Processor(ProcessorMixin):
images_spatial_crop, images_spatial_crop,
num_image_tokens, num_image_tokens,
) )
AutoProcessor.register("DeepseekVLV2Processor", DeepseekVLV2Processor)
...@@ -8,7 +8,6 @@ import torch ...@@ -8,7 +8,6 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from transformers import ( from transformers import (
AutoFeatureExtractor, AutoFeatureExtractor,
AutoProcessor,
BatchFeature, BatchFeature,
) )
from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
...@@ -345,4 +344,3 @@ class FireRedASR2Processor(ProcessorMixin): ...@@ -345,4 +344,3 @@ class FireRedASR2Processor(ProcessorMixin):
AutoFeatureExtractor.register( AutoFeatureExtractor.register(
"FireRedASR2FeatureExtractor", FireRedASR2FeatureExtractor "FireRedASR2FeatureExtractor", FireRedASR2FeatureExtractor
) )
AutoProcessor.register("FireRedASR2Processor", FireRedASR2Processor)
...@@ -9,7 +9,6 @@ import torchaudio.compliance.kaldi as kaldi ...@@ -9,7 +9,6 @@ import torchaudio.compliance.kaldi as kaldi
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence
from transformers import ( from transformers import (
AutoFeatureExtractor, AutoFeatureExtractor,
AutoProcessor,
BatchFeature, BatchFeature,
) )
from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
...@@ -503,4 +502,3 @@ class FunASRProcessor(ProcessorMixin): ...@@ -503,4 +502,3 @@ class FunASRProcessor(ProcessorMixin):
AutoFeatureExtractor.register("FunASRFeatureExtractor", FunASRFeatureExtractor) AutoFeatureExtractor.register("FunASRFeatureExtractor", FunASRFeatureExtractor)
AutoProcessor.register("FunASRProcessor", FunASRProcessor)
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
import numpy as np import numpy as np
import torch import torch
from transformers import AutoProcessor
from transformers.feature_extraction_utils import BatchFeature from transformers.feature_extraction_utils import BatchFeature
from transformers.image_utils import ImageInput from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessorMixin from transformers.processing_utils import ProcessorMixin
...@@ -225,6 +224,3 @@ def split_image_into_patch_blocks( ...@@ -225,6 +224,3 @@ def split_image_into_patch_blocks(
patches = img.reshape(-1, 3, patch_size, patch_size) patches = img.reshape(-1, 3, patch_size, patch_size)
return patches return patches
AutoProcessor.register("HunYuanVLProcessor", HunYuanVLProcessor)
...@@ -26,7 +26,7 @@ from functools import cached_property ...@@ -26,7 +26,7 @@ from functools import cached_property
import PIL import PIL
import torch import torch
from transformers import AutoProcessor, BatchFeature from transformers import BatchFeature
from transformers.image_utils import ImageInput from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
...@@ -453,6 +453,3 @@ class OvisProcessor(ProcessorMixin): ...@@ -453,6 +453,3 @@ class OvisProcessor(ProcessorMixin):
dict.fromkeys(tokenizer_input_names + image_processor_input_names) dict.fromkeys(tokenizer_input_names + image_processor_input_names)
) )
return names_from_processor + ["second_per_grid_ts"] return names_from_processor + ["second_per_grid_ts"]
AutoProcessor.register("OvisProcessor", OvisProcessor)
...@@ -6,7 +6,7 @@ from functools import cached_property ...@@ -6,7 +6,7 @@ from functools import cached_property
import numpy as np import numpy as np
import PIL import PIL
import torch import torch
from transformers import AutoProcessor, BatchFeature from transformers import BatchFeature
from transformers.image_utils import ImageInput from transformers.image_utils import ImageInput
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
...@@ -476,6 +476,3 @@ class Ovis2_5Processor(ProcessorMixin): ...@@ -476,6 +476,3 @@ class Ovis2_5Processor(ProcessorMixin):
visual_placeholders, visual_placeholders,
torch.tensor([[grid_t, grid_h, grid_w]]), torch.tensor([[grid_t, grid_h, grid_w]]),
) )
AutoProcessor.register("Ovis2_5Processor", Ovis2_5Processor)
...@@ -227,6 +227,3 @@ class Qwen3ASRProcessor(ProcessorMixin): ...@@ -227,6 +227,3 @@ class Qwen3ASRProcessor(ProcessorMixin):
+ ["feature_attention_mask"] + ["feature_attention_mask"]
) )
) )
AutoProcessor.register("Qwen3ASRProcessor", Qwen3ASRProcessor)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment