Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
......@@ -14,8 +14,7 @@ from transformers import (
)
from transformers.models.auto.modeling_auto import AutoModel
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
from sglang.srt.layers.linear import RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.vocab_parallel_embedding import VocabParallelEmbedding
......
......@@ -44,10 +44,8 @@ from sglang.srt.layers.dp_attention import (
)
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
ColumnParallelLinear,
MergedColumnParallelLinear,
QKVParallelLinear,
ReplicatedLinear,
RowParallelLinear,
)
from sglang.srt.layers.logits_processor import LogitsProcessor
......@@ -78,16 +76,12 @@ from sglang.srt.utils import (
BumpAllocator,
LazyValue,
add_prefix,
bind_or_assign,
cpu_has_amx_support,
get_bool_env_var,
get_device_sm,
get_int_env_var,
is_cpu,
is_cuda,
is_flashinfer_available,
is_hip,
is_non_idle_and_non_empty,
log_info_on_rank0,
use_intel_amx_backend,
)
......
......@@ -85,7 +85,7 @@ _is_sm100_supported = is_cuda() and is_sm100_supported()
if _is_cuda:
from sgl_kernel import FusedSetKVBufferArg
from sgl_kernel import FusedSetKVBufferArg # noqa: F401
class GptOssConfig(PretrainedConfig):
......
......@@ -12,18 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Inference-only HunYuan model compatible with HuggingFace weights."""
import logging
import re
from dataclasses import dataclass
from enum import Enum, auto
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from sglang.srt.distributed import (
get_pp_group,
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
......@@ -46,7 +42,6 @@ from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.rotary_embedding import get_rope
from sglang.srt.layers.sampler import Sampler
from sglang.srt.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE,
ParallelLMHead,
VocabParallelEmbedding,
)
......@@ -56,7 +51,7 @@ from sglang.srt.model_loader.weight_utils import (
kv_cache_scales_loader,
maybe_remap_kv_scale_name,
)
from sglang.srt.utils import add_prefix, is_hip
from sglang.srt.utils import is_hip
expert_distribution_recorder = ExpertDistributionRecorder()
......
......@@ -5,7 +5,6 @@ from torch import nn
from transformers import PretrainedConfig
from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.managers.mm_utils import (
......
......@@ -27,7 +27,7 @@ from transformers import LlamaConfig
from sglang.srt.distributed import get_pp_group
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import QKVParallelLinear, RowParallelLinear
from sglang.srt.layers.linear import QKVParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.vocab_parallel_embedding import (
......
......@@ -44,9 +44,7 @@ from sglang.srt.distributed import (
)
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.amx_utils import PackWeightMethod
from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes
from sglang.srt.layers.dp_attention import (
get_attention_tp_rank,
......@@ -87,20 +85,15 @@ from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
from sglang.srt.server_args import get_global_server_args
from sglang.srt.utils import (
BumpAllocator,
LazyValue,
add_prefix,
bind_or_assign,
cpu_has_amx_support,
get_bool_env_var,
get_device_sm,
get_int_env_var,
is_cpu,
is_cuda,
is_flashinfer_available,
is_hip,
is_non_idle_and_non_empty,
is_npu,
is_sm100_supported,
)
_is_hip = is_hip()
......@@ -113,13 +106,7 @@ _is_cpu = is_cpu()
_device_sm = get_device_sm()
if _is_cuda:
from sgl_kernel import (
awq_dequantize,
bmm_fp8,
dsv3_fused_a_gemm,
dsv3_router_gemm,
merge_state_v2,
)
from sgl_kernel import awq_dequantize
elif _is_cpu and _is_cpu_amx_available:
pass
elif _is_hip:
......
......@@ -32,14 +32,10 @@
import concurrent.futures
import logging
import os
from enum import IntEnum, auto
from typing import Any, Dict, Iterable, Optional, Tuple, Union
from typing import Iterable, Optional, Tuple
import torch
import torch.nn.functional as F
from torch import nn
from tqdm import tqdm
from sglang.srt.configs import LongcatFlashConfig
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
......@@ -75,7 +71,6 @@ from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
from sglang.srt.models.longcat_flash import LongcatFlashForCausalLM, LongcatFlashMLP
from sglang.srt.utils import (
BumpAllocator,
LazyValue,
add_prefix,
bind_or_assign,
cpu_has_amx_support,
......@@ -97,13 +92,7 @@ _is_cpu = is_cpu()
_device_sm = get_device_sm()
if _is_cuda:
from sgl_kernel import (
awq_dequantize,
bmm_fp8,
dsv3_fused_a_gemm,
dsv3_router_gemm,
merge_state_v2,
)
from sgl_kernel import awq_dequantize
elif _is_cpu and _is_cpu_amx_available:
pass
elif _is_hip:
......
# Adapted from qwen2.py
from functools import partial
from typing import Any, Dict, Iterable, Optional, Tuple
from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from sglang.srt.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
split_tensor_along_last_dim,
tensor_model_parallel_all_gather,
)
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import QKVParallelLinear, RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.pooler import Pooler, PoolingType
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.rotary_embedding import get_rope
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2DecoderLayer, Qwen2MLP, Qwen2Model
from sglang.srt.models.qwen2 import Qwen2DecoderLayer, Qwen2Model
from sglang.srt.utils import add_prefix
MiMoConfig = None
......
# Adapted from https://github.com/vllm-project/vllm/pull/17433/files and deepseek_nextn.py
from functools import partial
from typing import Any, Dict, Iterable, Optional, Tuple
from typing import Iterable, Optional, Tuple
import torch
from torch import nn
......
......@@ -43,7 +43,6 @@ from sglang.srt.managers.mm_utils import (
general_mm_embed_routine,
)
from sglang.srt.managers.schedule_batch import (
Modality,
MultimodalDataItem,
MultimodalInputs,
flatten_nested_list,
......@@ -59,8 +58,6 @@ from sglang.srt.utils import logger
try:
from transformers import LogitsWarper
from vector_quantize_pytorch import GroupedResidualFSQ
from vocos import Vocos
from vocos.pretrained import instantiate_class
_tts_deps = True
except:
......
......@@ -24,7 +24,6 @@ from torch import nn
from transformers import MixtralConfig
from sglang.srt.distributed import (
get_moe_expert_parallel_world_size,
get_pp_group,
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
......
......@@ -17,7 +17,6 @@ from collections.abc import Iterable
from typing import Optional, Union
import torch
import torch.nn.functional as F
from torch import nn
from transformers import OPTConfig
......@@ -26,10 +25,8 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.activation import get_act_fn
from sglang.srt.layers.linear import (
ColumnParallelLinear,
MergedColumnParallelLinear,
QKVParallelLinear,
ReplicatedLinear,
RowParallelLinear,
......@@ -38,7 +35,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorO
from sglang.srt.layers.pooler import Pooler, PoolingType
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.utils import PPMissingLayer, get_layer_id
from sglang.srt.layers.utils import get_layer_id
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
......@@ -47,7 +44,6 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTe
from sglang.srt.model_loader.weight_utils import (
default_weight_loader,
kv_cache_scales_loader,
maybe_remap_kv_scale_name,
)
from sglang.srt.utils import add_prefix, make_layers
......
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/phi.py
from typing import Iterable, Optional, Union
from typing import Iterable, Optional
import torch
from torch import nn
......
......@@ -24,7 +24,7 @@ from typing import List, Optional, Tuple
import numpy as np
import torch
from torch import nn
from transformers import PretrainedConfig, SiglipVisionConfig
from transformers import PretrainedConfig
from sglang.srt.layers.quantization import QuantizationConfig
from sglang.srt.managers.mm_utils import (
......
......@@ -18,7 +18,6 @@ from sglang.srt.layers.pooler import Pooler, PoolingType
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.rotary_embedding import get_rope
from sglang.srt.layers.utils import PPMissingLayer
from sglang.srt.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE,
ParallelLMHead,
......
......@@ -16,13 +16,10 @@
Using mistral-community/pixtral-12b as reference.
"""
import logging
import math
from typing import Iterable, List, Optional, Set, Tuple, Union
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import PixtralVisionConfig, PretrainedConfig
from transformers.models.pixtral.modeling_pixtral import PixtralRotaryEmbedding
from transformers.models.pixtral.modeling_pixtral import (
......
......@@ -15,7 +15,6 @@
# Adapted from
# https://github.com/vllm-project/vllm/blob/c7f2cf2b7f67bce5842fedfdba508440fe257375/vllm/model_executor/models/qwen.py#L1
import time
from typing import Any, Dict, Iterable, Optional, Tuple
import torch
......
......@@ -23,30 +23,18 @@
# limitations under the License.
"""Inference-only Qwen2-Audio model compatible with HuggingFace weights."""
import logging
import math
from functools import lru_cache, partial
from typing import Any, Iterable, List, Optional, Tuple, Type, TypedDict
from typing import Any, Iterable, List, Optional, Tuple
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
from transformers import AutoTokenizer, Qwen2AudioEncoderConfig, Qwen2Config
from transformers.activations import ACT2FN
from transformers import Qwen2AudioEncoderConfig, Qwen2Config
from transformers.models.qwen2_audio.configuration_qwen2_audio import Qwen2AudioConfig
from transformers.models.qwen2_audio.modeling_qwen2_audio import (
Qwen2AudioEncoder,
Qwen2AudioMultiModalProjector,
)
from sglang.srt.layers.activation import QuickGELU
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.pooler import Pooler, PoolingType
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.utils import get_layer_id
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
from sglang.srt.managers.mm_utils import (
MultiModalityDataPaddingPatternMultimodalTokens,
general_mm_embed_routine,
......@@ -60,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)
......
import enum
import logging
from typing import Any, Dict, Iterable, Optional, Set, Tuple
from typing import Any, Iterable, Optional, Set, Tuple
import torch
import torch.nn.functional as F
from torch import nn
from sglang.srt.configs.qwen3_next import Qwen3NextConfig
from sglang.srt.distributed import (
divide,
get_pp_group,
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.distributed import divide, get_pp_group
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
from sglang.srt.layers.attention.fla.layernorm_gated import RMSNorm as RMSNormGated
......@@ -23,10 +17,9 @@ from sglang.srt.layers.dp_attention import (
get_attention_tp_size,
is_dp_attention_enabled,
)
from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
from sglang.srt.layers.layernorm import GemmaRMSNorm
from sglang.srt.layers.linear import (
ColumnParallelLinear,
MergedColumnParallelLinear,
QKVParallelLinear,
RowParallelLinear,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment