Unverified Commit ce3a3e87 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Move multimodal processors into a separate folder (#7581)

parent 41650b0d
# COPIED FROM DeepGEMM
def align(x: int, y: int) -> int:
return ceil_div(x, y) * y
# COPIED FROM DeepGEMM
def ceil_div(x: int, y: int) -> int:
return (x + y - 1) // y
......@@ -19,7 +19,7 @@ from transformers import (
from transformers.image_utils import to_numpy_array
from sglang.srt.configs.utils import register_image_processor, register_processor
from sglang.srt.mm_utils import expand2square
from sglang.srt.multimodal.mm_utils import expand2square
class DictToObject(dict):
......
......@@ -4,9 +4,8 @@ from typing import List, Optional
import torch
import triton
from sglang.math_utils import ceil_div
from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
from sglang.srt.utils import dispose_tensor, is_cuda
from sglang.srt.utils import ceil_div, dispose_tensor, is_cuda
logger = logging.getLogger(__name__)
......
......@@ -12,7 +12,6 @@ import torch
import triton
import triton.language as tl
from sglang.math_utils import ceil_div
from sglang.srt.layers.moe.topk import select_experts
from sglang.srt.layers.quantization.fp8_kernel import (
per_token_group_quant_fp8,
......@@ -25,6 +24,7 @@ from sglang.srt.layers.quantization.int8_kernel import (
sglang_per_token_group_quant_int8,
)
from sglang.srt.utils import (
ceil_div,
cpu_has_amx_support,
direct_register_custom_op,
get_bool_env_var,
......@@ -32,7 +32,6 @@ from sglang.srt.utils import (
is_cpu,
is_cuda,
is_hip,
log_info_on_rank0,
next_power_of_2,
)
......
......@@ -23,9 +23,9 @@ import torch
import triton
import triton.language as tl
from sglang.math_utils import align
from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.utils import (
align,
direct_register_custom_op,
get_device_core_count,
get_device_name,
......
from typing import Callable, List, Optional, Tuple
import einops
import torch
from sglang.math_utils import align
from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8
from sglang.srt.layers.utils import is_sm100_supported
......@@ -27,6 +25,7 @@ from sglang.srt.layers.quantization.fp8_kernel import (
w8a8_block_fp8_matmul_triton,
)
from sglang.srt.utils import (
align,
get_bool_env_var,
get_cuda_version,
get_device_capability,
......
......@@ -22,7 +22,7 @@ from dataclasses import dataclass, field
from enum import Enum
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from sglang.srt.mm_utils import has_valid_data
from sglang.srt.multimodal.mm_utils import has_valid_data
# handle serialization of Image for pydantic
if TYPE_CHECKING:
......
......@@ -2,8 +2,6 @@
Multi-modality utils
"""
import dataclasses
import logging
from abc import abstractmethod
from typing import Callable, List, Optional, Tuple
......
......@@ -5,9 +5,7 @@ import logging
import pkgutil
from functools import lru_cache
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor,
)
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
from sglang.srt.server_args import ServerArgs
logger = logging.getLogger(__name__)
......@@ -29,7 +27,7 @@ def get_dummy_processor():
@lru_cache()
def import_processors():
package_name = "sglang.srt.managers.multimodal_processors"
package_name = "sglang.srt.multimodal.processors"
package = importlib.import_module(package_name)
for _, name, ispkg in pkgutil.iter_modules(package.__path__, package_name + "."):
if not ispkg:
......
......@@ -41,16 +41,16 @@ from sglang.srt.managers.schedule_batch import (
MultimodalDataItem,
MultimodalInputs,
)
from sglang.srt.mm_utils import (
get_anyres_image_grid_shape,
unpad_image,
unpad_image_shape,
)
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.llama import LlamaForCausalLM
from sglang.srt.models.mistral import MistralForCausalLM
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.multimodal.mm_utils import (
get_anyres_image_grid_shape,
unpad_image,
unpad_image_shape,
)
from sglang.srt.utils import add_prefix, flatten_nested_list, logger
......
from typing import List, Union
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.clip import CLIPModel
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
from sglang.srt.utils import load_image
......
......@@ -20,12 +20,12 @@ from typing import List, Union
import torch
from sglang.srt.managers.multimodal_processors.base_processor import (
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor,
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
class DeepseekVL2ImageProcessor(BaseMultimodalProcessor):
......
......@@ -4,11 +4,9 @@ from typing import Dict, List, Union
from sglang.srt.managers.multimodal_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor,
)
from sglang.srt.managers.multimodal_processors.base_processor import (
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.gemma3_mm import Gemma3ForConditionalGeneration
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
# Copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma3/image_processing_gemma3_fast.py
# will be removed in the future
......
......@@ -18,10 +18,8 @@ from typing import Dict, List, Optional, Union
from sglang.srt.managers.multimodal_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor,
)
from sglang.srt.managers.multimodal_processors.base_processor import (
MultimodalSpecialTokens,
)
from sglang.srt.models.gemma3n_mm import Gemma3nForConditionalGeneration
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
class Gemma3nSGLangProcessor(SGLangBaseProcessor):
......
......@@ -5,12 +5,12 @@ import torch
from decord import VideoReader, cpu
from PIL import Image
from sglang.srt.managers.multimodal_processors.base_processor import (
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.internvl import InternVLChatModel
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor,
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.internvl import InternVLChatModel
class InternVLImageProcessor(BaseMultimodalProcessor):
......
from typing import List, Union
from sglang.srt.managers.multimodal_processors.base_processor import (
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor,
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
class JanusProImageProcessor(BaseMultimodalProcessor):
......
......@@ -3,14 +3,12 @@ from typing import Any, Dict, List, Optional, Union
import torch
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor,
)
from sglang.srt.managers.multimodal_processors.base_processor import (
MultimodalSpecialTokens,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.models.kimi_vl import KimiVLForConditionalGeneration
from sglang.srt.multimodal.processors.base_processor import (
BaseMultimodalProcessor as SGLangBaseProcessor,
)
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
# Compatible with KimiVLForConditionalGeneration
......
......@@ -7,11 +7,7 @@ from transformers.models.auto.processing_auto import (
)
import sglang.srt.managers.multimodal_processor as sgl_mm_processor_utils
from sglang.srt.managers.multimodal_processors.base_processor import (
BaseMultimodalProcessor,
)
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
from sglang.srt.mm_utils import expand2square, process_anyres_image
from sglang.srt.models.llava import (
LlavaForConditionalGeneration,
LlavaLlamaForCausalLM,
......@@ -20,6 +16,8 @@ from sglang.srt.models.llava import (
)
from sglang.srt.models.llavavid import LlavaVidForCausalLM
from sglang.srt.models.mistral import Mistral3ForConditionalGeneration
from sglang.srt.multimodal.mm_utils import expand2square, process_anyres_image
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
from sglang.srt.utils import load_image, logger
from sglang.utils import get_exception_traceback
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment