Unverified Commit fdc4e1e5 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny move files to utils folder (#11166)

parent 04b86b3c
......@@ -29,7 +29,6 @@ from typing import List, Optional, Tuple, Union
import torch
import torch.distributed as dist
from sglang.srt import slow_rank_detector
from sglang.srt.configs.device_config import DeviceConfig
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
......@@ -115,7 +114,6 @@ from sglang.srt.offloader import (
get_offloader,
set_offloader,
)
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from sglang.srt.server_args import ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
......@@ -140,7 +138,9 @@ from sglang.srt.utils import (
monkey_patch_p2p_access_check,
monkey_patch_vllm_gguf_config,
set_cuda_arch,
slow_rank_detector,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
from sglang.srt.weight_sync.tensor_bucket import (
FlattenedTensorBucket,
FlattenedTensorMetadata,
......
......@@ -9,7 +9,6 @@ import torch.nn as nn
from transformers.activations import ACT2FN
from sglang.srt.configs import DotsOCRConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
......@@ -23,6 +22,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -23,7 +23,6 @@ import torch
from torch import nn
from transformers import Gemma3Config, PreTrainedModel
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.layernorm import Gemma3RMSNorm
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
......@@ -44,6 +43,7 @@ from sglang.srt.model_loader.weight_utils import (
from sglang.srt.models.gemma3_causal import Gemma3ForCausalLM
from sglang.srt.models.siglip import SiglipVisionModel
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -14,7 +14,6 @@ from transformers import (
)
from transformers.models.auto.modeling_auto import AutoModel
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
......@@ -38,6 +37,7 @@ from sglang.srt.model_loader.weight_utils import (
from sglang.srt.models.gemma3n_audio import Gemma3nAudioEncoder
from sglang.srt.models.gemma3n_causal import Gemma3nRMSNorm, Gemma3nTextModel
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -7,7 +7,6 @@ import torch.nn as nn
import torch.nn.functional as F
from transformers.models.glm4v.configuration_glm4v import Glm4vConfig, Glm4vVisionConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.layernorm import RMSNorm
......@@ -28,6 +27,7 @@ from sglang.srt.models.qwen2_5_vl import (
Qwen2_5_VLForConditionalGeneration,
)
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -10,7 +10,6 @@ from sglang.srt.distributed import (
get_moe_expert_parallel_world_size,
get_tensor_model_parallel_world_size,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
......@@ -22,6 +21,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.glm4_moe import Glm4MoeModel
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration, Glm4vVisionModel
from sglang.srt.utils import add_prefix, is_cuda, log_info_on_rank0
from sglang.srt.utils.hf_transformers_utils import get_processor
_is_cuda = is_cuda()
......
......@@ -40,7 +40,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
Qwen2_5_VisionRotaryEmbedding,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
......@@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2Model
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -39,7 +39,6 @@ from transformers.models.qwen2_audio.modeling_qwen2_audio import (
Qwen2AudioMultiModalProjector,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import QuickGELU
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
......@@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -33,7 +33,6 @@ from einops import rearrange
from transformers import Qwen2VLConfig
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import QuickGELU
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
......@@ -50,6 +49,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2Model
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
)
from sglang.srt.configs.qwen3_vl import Qwen3VLConfig, Qwen3VLVisionConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
......@@ -45,6 +44,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2_vl import Qwen2VLVideoInputs
from sglang.srt.models.qwen3 import Qwen3Model
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -34,7 +34,6 @@ from sglang.srt.distributed import (
get_pp_group,
get_tensor_model_parallel_rank,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.pooler import Pooler, PoolingType
......@@ -58,6 +57,7 @@ from sglang.srt.models.qwen3_vl import (
Qwen3VLForConditionalGeneration,
)
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
......@@ -24,7 +24,6 @@ from typing import List, Literal, Optional, Union
from sglang.srt.connector import ConnectorType
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
from sglang.srt.lora.lora_registry import LoRARef
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import (
......@@ -47,6 +46,7 @@ from sglang.srt.utils import (
nullable_str,
parse_connector_type,
)
from sglang.srt.utils.hf_transformers_utils import check_gguf_file, get_config
from sglang.utils import is_in_ci
logger = logging.getLogger(__name__)
......
# Temporarily do this to avoid changing all imports in the repo
from .common import *
......@@ -33,7 +33,7 @@ async def update_weights(
"""
infer_tp_size = device_mesh[device_mesh_key].mesh.size()[0]
infer_tp_rank = device_mesh[device_mesh_key].get_local_rank()
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
monkey_patch_torch_reductions()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment