Unverified Commit ce3a3e87 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Move multimodal processors into a separate folder (#7581)

parent 41650b0d
# COPIED FROM DeepGEMM
def align(x: int, y: int) -> int:
return ceil_div(x, y) * y
# COPIED FROM DeepGEMM
def ceil_div(x: int, y: int) -> int:
return (x + y - 1) // y
...@@ -19,7 +19,7 @@ from transformers import ( ...@@ -19,7 +19,7 @@ from transformers import (
from transformers.image_utils import to_numpy_array from transformers.image_utils import to_numpy_array
from sglang.srt.configs.utils import register_image_processor, register_processor from sglang.srt.configs.utils import register_image_processor, register_processor
from sglang.srt.mm_utils import expand2square from sglang.srt.multimodal.mm_utils import expand2square
class DictToObject(dict): class DictToObject(dict):
......
...@@ -4,9 +4,8 @@ from typing import List, Optional ...@@ -4,9 +4,8 @@ from typing import List, Optional
import torch import torch
import triton import triton
from sglang.math_utils import ceil_div
from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8 from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
from sglang.srt.utils import dispose_tensor, is_cuda from sglang.srt.utils import ceil_div, dispose_tensor, is_cuda
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -12,7 +12,6 @@ import torch ...@@ -12,7 +12,6 @@ import torch
import triton import triton
import triton.language as tl import triton.language as tl
from sglang.math_utils import ceil_div
from sglang.srt.layers.moe.topk import select_experts from sglang.srt.layers.moe.topk import select_experts
from sglang.srt.layers.quantization.fp8_kernel import ( from sglang.srt.layers.quantization.fp8_kernel import (
per_token_group_quant_fp8, per_token_group_quant_fp8,
...@@ -25,6 +24,7 @@ from sglang.srt.layers.quantization.int8_kernel import ( ...@@ -25,6 +24,7 @@ from sglang.srt.layers.quantization.int8_kernel import (
sglang_per_token_group_quant_int8, sglang_per_token_group_quant_int8,
) )
from sglang.srt.utils import ( from sglang.srt.utils import (
ceil_div,
cpu_has_amx_support, cpu_has_amx_support,
direct_register_custom_op, direct_register_custom_op,
get_bool_env_var, get_bool_env_var,
...@@ -32,7 +32,6 @@ from sglang.srt.utils import ( ...@@ -32,7 +32,6 @@ from sglang.srt.utils import (
is_cpu, is_cpu,
is_cuda, is_cuda,
is_hip, is_hip,
log_info_on_rank0,
next_power_of_2, next_power_of_2,
) )
......
...@@ -23,9 +23,9 @@ import torch ...@@ -23,9 +23,9 @@ import torch
import triton import triton
import triton.language as tl import triton.language as tl
from sglang.math_utils import align
from sglang.srt.layers.quantization import deep_gemm_wrapper from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.utils import ( from sglang.srt.utils import (
align,
direct_register_custom_op, direct_register_custom_op,
get_device_core_count, get_device_core_count,
get_device_name, get_device_name,
......
from typing import Callable, List, Optional, Tuple from typing import Callable, List, Optional, Tuple
import einops
import torch import torch
from sglang.math_utils import align
from sglang.srt.layers.quantization import deep_gemm_wrapper from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8 from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8
from sglang.srt.layers.utils import is_sm100_supported from sglang.srt.layers.utils import is_sm100_supported
...@@ -27,6 +25,7 @@ from sglang.srt.layers.quantization.fp8_kernel import ( ...@@ -27,6 +25,7 @@ from sglang.srt.layers.quantization.fp8_kernel import (
w8a8_block_fp8_matmul_triton, w8a8_block_fp8_matmul_triton,
) )
from sglang.srt.utils import ( from sglang.srt.utils import (
align,
get_bool_env_var, get_bool_env_var,
get_cuda_version, get_cuda_version,
get_device_capability, get_device_capability,
......
...@@ -22,7 +22,7 @@ from dataclasses import dataclass, field ...@@ -22,7 +22,7 @@ from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from sglang.srt.mm_utils import has_valid_data from sglang.srt.multimodal.mm_utils import has_valid_data
# handle serialization of Image for pydantic # handle serialization of Image for pydantic
if TYPE_CHECKING: if TYPE_CHECKING:
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
Multi-modality utils Multi-modality utils
""" """
import dataclasses
import logging
from abc import abstractmethod from abc import abstractmethod
from typing import Callable, List, Optional, Tuple from typing import Callable, List, Optional, Tuple
......
...@@ -5,9 +5,7 @@ import logging ...@@ -5,9 +5,7 @@ import logging
import pkgutil import pkgutil
from functools import lru_cache from functools import lru_cache
from sglang.srt.managers.multimodal_processors.base_processor import ( from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
BaseMultimodalProcessor,
)
from sglang.srt.server_args import ServerArgs from sglang.srt.server_args import ServerArgs
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -29,7 +27,7 @@ def get_dummy_processor(): ...@@ -29,7 +27,7 @@ def get_dummy_processor():
@lru_cache() @lru_cache()
def import_processors(): def import_processors():
package_name = "sglang.srt.managers.multimodal_processors" package_name = "sglang.srt.multimodal.processors"
package = importlib.import_module(package_name) package = importlib.import_module(package_name)
for _, name, ispkg in pkgutil.iter_modules(package.__path__, package_name + "."): for _, name, ispkg in pkgutil.iter_modules(package.__path__, package_name + "."):
if not ispkg: if not ispkg:
......
...@@ -41,16 +41,16 @@ from sglang.srt.managers.schedule_batch import ( ...@@ -41,16 +41,16 @@ from sglang.srt.managers.schedule_batch import (
MultimodalDataItem, MultimodalDataItem,
MultimodalInputs, MultimodalInputs,
) )
from sglang.srt.mm_utils import (
get_anyres_image_grid_shape,
unpad_image,
unpad_image_shape,
)
from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.llama import LlamaForCausalLM from sglang.srt.models.llama import LlamaForCausalLM
from sglang.srt.models.mistral import MistralForCausalLM from sglang.srt.models.mistral import MistralForCausalLM
from sglang.srt.models.qwen2 import Qwen2ForCausalLM from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.multimodal.mm_utils import (
get_anyres_image_grid_shape,
unpad_image,
unpad_image_shape,
)
from sglang.srt.utils import add_prefix, flatten_nested_list, logger from sglang.srt.utils import add_prefix, flatten_nested_list, logger
......
from typing import List, Union from typing import List, Union
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.clip import CLIPModel from sglang.srt.models.clip import CLIPModel
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
from sglang.srt.utils import load_image from sglang.srt.utils import load_image
......
...@@ -20,12 +20,12 @@ from typing import List, Union ...@@ -20,12 +20,12 @@ from typing import List, Union
import torch import torch
from sglang.srt.managers.multimodal_processors.base_processor import ( from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor, BaseMultimodalProcessor,
MultimodalSpecialTokens, MultimodalSpecialTokens,
) )
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
class DeepseekVL2ImageProcessor(BaseMultimodalProcessor): class DeepseekVL2ImageProcessor(BaseMultimodalProcessor):
......
...@@ -4,11 +4,9 @@ from typing import Dict, List, Union ...@@ -4,11 +4,9 @@ from typing import Dict, List, Union
from sglang.srt.managers.multimodal_processor import ( from sglang.srt.managers.multimodal_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor, BaseMultimodalProcessor as SGLangBaseProcessor,
) )
from sglang.srt.managers.multimodal_processors.base_processor import (
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.gemma3_mm import Gemma3ForConditionalGeneration from sglang.srt.models.gemma3_mm import Gemma3ForConditionalGeneration
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
# Copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma3/image_processing_gemma3_fast.py # Copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma3/image_processing_gemma3_fast.py
# will be removed in the future # will be removed in the future
......
...@@ -18,10 +18,8 @@ from typing import Dict, List, Optional, Union ...@@ -18,10 +18,8 @@ from typing import Dict, List, Optional, Union
from sglang.srt.managers.multimodal_processor import ( from sglang.srt.managers.multimodal_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor, BaseMultimodalProcessor as SGLangBaseProcessor,
) )
from sglang.srt.managers.multimodal_processors.base_processor import (
MultimodalSpecialTokens,
)
from sglang.srt.models.gemma3n_mm import Gemma3nForConditionalGeneration from sglang.srt.models.gemma3n_mm import Gemma3nForConditionalGeneration
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
class Gemma3nSGLangProcessor(SGLangBaseProcessor): class Gemma3nSGLangProcessor(SGLangBaseProcessor):
......
...@@ -5,12 +5,12 @@ import torch ...@@ -5,12 +5,12 @@ import torch
from decord import VideoReader, cpu from decord import VideoReader, cpu
from PIL import Image from PIL import Image
from sglang.srt.managers.multimodal_processors.base_processor import ( from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.internvl import InternVLChatModel
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor, BaseMultimodalProcessor,
MultimodalSpecialTokens, MultimodalSpecialTokens,
) )
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.internvl import InternVLChatModel
class InternVLImageProcessor(BaseMultimodalProcessor): class InternVLImageProcessor(BaseMultimodalProcessor):
......
from typing import List, Union from typing import List, Union
from sglang.srt.managers.multimodal_processors.base_processor import ( from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor, BaseMultimodalProcessor,
MultimodalSpecialTokens, MultimodalSpecialTokens,
) )
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
class JanusProImageProcessor(BaseMultimodalProcessor): class JanusProImageProcessor(BaseMultimodalProcessor):
......
...@@ -3,14 +3,12 @@ from typing import Any, Dict, List, Optional, Union ...@@ -3,14 +3,12 @@ from typing import Any, Dict, List, Optional, Union
import torch import torch
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor,
)
from sglang.srt.managers.multimodal_processors.base_processor import (
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.kimi_vl import KimiVLForConditionalGeneration from sglang.srt.models.kimi_vl import KimiVLForConditionalGeneration
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor,
)
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
# Compatible with KimiVLForConditionalGeneration # Compatible with KimiVLForConditionalGeneration
......
...@@ -7,11 +7,7 @@ from transformers.models.auto.processing_auto import ( ...@@ -7,11 +7,7 @@ from transformers.models.auto.processing_auto import (
) )
import sglang.srt.managers.multimodal_processor as sgl_mm_processor_utils import sglang.srt.managers.multimodal_processor as sgl_mm_processor_utils
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.mm_utils import expand2square, process_anyres_image
from sglang.srt.models.llava import ( from sglang.srt.models.llava import (
LlavaForConditionalGeneration, LlavaForConditionalGeneration,
LlavaLlamaForCausalLM, LlavaLlamaForCausalLM,
...@@ -20,6 +16,8 @@ from sglang.srt.models.llava import ( ...@@ -20,6 +16,8 @@ from sglang.srt.models.llava import (
) )
from sglang.srt.models.llavavid import LlavaVidForCausalLM from sglang.srt.models.llavavid import LlavaVidForCausalLM
from sglang.srt.models.mistral import Mistral3ForConditionalGeneration from sglang.srt.models.mistral import Mistral3ForConditionalGeneration
from sglang.srt.multimodal.mm_utils import expand2square, process_anyres_image
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
from sglang.srt.utils import load_image, logger from sglang.srt.utils import load_image, logger
from sglang.utils import get_exception_traceback from sglang.utils import get_exception_traceback
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment