[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

62797440 · Chang Su · GitHub · 2614adf9 · 62797440 · 62797440
Unverified Commit 62797440 authored Oct 17, 2025 by Chang Su Committed by GitHub Oct 17, 2025
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,9 +27,9 @@ repos:
    rev: v0.11.7
    hooks:
      - id: ruff
-        args: [--select=F401, --fixable=F401]
-        files: ^(benchmark/|docs/|examples/)
-        exclude: \.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
+        args: [--select=F401,F821, --fixable=F401]
+        files: ^(benchmark/|docs/|examples/|python/sglang/)
+        exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
  - repo: https://github.com/psf/black
    rev: 24.10.0
    hooks:

--- a/python/sglang/srt/_custom_ops.py
+++ b/python/sglang/srt/_custom_ops.py
@@ -15,7 +15,7 @@ if not is_hpu():
    # ROCm does not use vllm custom allreduce
    if use_vllm_custom_allreduce and not is_hip():
        try:
-            import vllm._C
+            import vllm._C  # noqa: F401
        except ImportError as e:
            logger.warning("Failed to import from vllm._C with %r", e)
    else:

--- a/python/sglang/srt/compilation/cuda_piecewise_backend.py
+++ b/python/sglang/srt/compilation/cuda_piecewise_backend.py
@@ -9,7 +9,6 @@ from unittest.mock import patch
 import torch
 import torch.fx as fx

-import sglang.srt.compilation.weak_ref_tensor_jit
 from sglang.srt.compilation.compilation_config import CompilationConfig
 from sglang.srt.compilation.compilation_counter import compilation_counter


--- a/python/sglang/srt/configs/deepseekvl2.py
+++ b/python/sglang/srt/configs/deepseekvl2.py
 import math
-import os
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple


--- a/python/sglang/srt/configs/dots_vlm.py
+++ b/python/sglang/srt/configs/dots_vlm.py
-from typing import Any, List, Optional, Union
-
-from transformers import AutoProcessor, LlamaTokenizerFast, PretrainedConfig
-from transformers.feature_extraction_utils import BatchFeature
-from transformers.image_utils import ImageInput
-from transformers.processing_utils import ProcessingKwargs, Unpack
-from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
+from transformers import AutoProcessor, PretrainedConfig
+from transformers.processing_utils import ProcessingKwargs

 try:
    from transformers import Qwen2_5_VLProcessor

--- a/python/sglang/srt/configs/falcon_h1.py
+++ b/python/sglang/srt/configs/falcon_h1.py
@@ -14,17 +14,12 @@
 # limitations under the License.
 """Falcon-H1 model configuration"""

-import enum

 from transformers.configuration_utils import PretrainedConfig
-from transformers.modeling_rope_utils import rope_config_validation
 from transformers.utils import logging

 from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
-from sglang.srt.layers.dp_attention import (
-    get_attention_tp_size,
-    get_tensor_model_parallel_world_size,
-)
+from sglang.srt.layers.dp_attention import get_tensor_model_parallel_world_size

 logger = logging.get_logger(__name__)


--- a/python/sglang/srt/configs/qwen3_next.py
+++ b/python/sglang/srt/configs/qwen3_next.py
@@ -21,7 +21,6 @@ from transformers.modeling_rope_utils import rope_config_validation
 from transformers.utils import logging

 from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
-from sglang.srt.distributed.utils import divide
 from sglang.srt.layers.dp_attention import get_attention_tp_size

 logger = logging.get_logger(__name__)

--- a/python/sglang/srt/connector/remote_instance.py
+++ b/python/sglang/srt/connector/remote_instance.py
 # SPDX-License-Identifier: Apache-2.0

 import logging
-from typing import Generator, List, Optional, Tuple
+from typing import Generator, Optional, Tuple
 from urllib.parse import urlparse

 import torch

--- a/python/sglang/srt/disaggregation/ascend/transfer_engine.py
+++ b/python/sglang/srt/disaggregation/ascend/transfer_engine.py
 import logging
 import os
-from typing import List, Optional
+from typing import List

 import torch


--- a/python/sglang/srt/disaggregation/decode.py
+++ b/python/sglang/srt/disaggregation/decode.py
@@ -25,7 +25,7 @@ import time
 from collections import deque
 from dataclasses import dataclass
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union

 import torch
 from torch.distributed import ProcessGroup
@@ -48,10 +48,7 @@ from sglang.srt.disaggregation.utils import (
 )
 from sglang.srt.layers.dp_attention import get_attention_tp_size
 from sglang.srt.managers.schedule_batch import FINISH_ABORT, RequestStage, ScheduleBatch
-from sglang.srt.mem_cache.allocator import (
-    BaseTokenToKVPoolAllocator,
-    SWATokenToKVPoolAllocator,
-)
+from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
 from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
 from sglang.srt.mem_cache.memory_pool import (
    HybridLinearKVPool,
@@ -61,7 +58,6 @@ from sglang.srt.mem_cache.memory_pool import (
    ReqToTokenPool,
    SWAKVPool,
 )
-from sglang.srt.model_executor.forward_batch_info import ForwardMode
 from sglang.srt.utils import get_int_env_var, require_mlp_sync
 from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter


--- a/python/sglang/srt/disaggregation/prefill.py
+++ b/python/sglang/srt/disaggregation/prefill.py
@@ -20,7 +20,6 @@ Life cycle of a request in the prefill server
 from __future__ import annotations

 import logging
-import threading
 import time
 from collections import deque
 from http import HTTPStatus
@@ -54,7 +53,7 @@ from sglang.srt.mem_cache.memory_pool import (
    NSATokenToKVPool,
    SWAKVPool,
 )
-from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
+from sglang.srt.model_executor.forward_batch_info import PPProxyTensors
 from sglang.srt.utils import (
    DynamicGradMode,
    broadcast_pyobj,

--- a/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
+++ b/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
@@ -32,7 +32,7 @@ try:
        ops.meta_size()
    else:
        # Use custom allreduce from sgl kernel (ROCM and TRT-LLM)
-        import sgl_kernel
+        import sgl_kernel  # noqa: F401
    custom_ar = True
 except Exception:
    # For CPUs

--- a/python/sglang/srt/distributed/device_communicators/pymscclpp.py
+++ b/python/sglang/srt/distributed/device_communicators/pymscclpp.py
@@ -4,7 +4,7 @@ import math
 import os
 from contextlib import contextmanager
 from enum import IntEnum
-from typing import Any, Callable, List, Optional, TypeVar, Union
+from typing import Optional, Union

 import torch
 import torch.distributed as dist
@@ -24,7 +24,7 @@ if _is_hip:
    mscclpp_is_available = False
 if _is_cuda:
    try:
-        import sgl_kernel
+        import sgl_kernel  # noqa: F401

        mscclpp_is_available = True
    except:

--- a/python/sglang/srt/distributed/device_communicators/symm_mem.py
+++ b/python/sglang/srt/distributed/device_communicators/symm_mem.py
@@ -9,7 +9,7 @@ from torch.distributed import ProcessGroup
 from sglang.srt.distributed.device_communicators.all_reduce_utils import (
    SYMM_MEM_ALL_REDUCE_MAX_SIZES,
 )
-from sglang.srt.utils import get_device_capability, is_cuda, is_hip
+from sglang.srt.utils import is_cuda, is_hip

 try:
    import torch.distributed._symmetric_memory as torch_symm_mem

--- a/python/sglang/srt/distributed/naive_distributed.py
+++ b/python/sglang/srt/distributed/naive_distributed.py
 import base64
-import os
 import pickle
 import time
 from pathlib import Path

--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
 # SPDX-License-Identifier: Apache-2.0
 # Copied from vLLM
-import json
 import logging
 from abc import ABC, abstractmethod
 from typing import Union

--- a/python/sglang/srt/entrypoints/harmony_utils.py
+++ b/python/sglang/srt/entrypoints/harmony_utils.py
@@ -3,7 +3,6 @@
 # Adapted from vLLM: https://github.com/vllm-project/vllm/blob/1b9902806915040ac9b3029f2ab7522ec505afc3/vllm/entrypoints/harmony_utils.py
 # Slight differences in processing chat messages
 import datetime
-import json
 from collections.abc import Iterable
 from typing import Literal, Optional, Union


--- a/python/sglang/srt/entrypoints/http_server.py
+++ b/python/sglang/srt/entrypoints/http_server.py
@@ -19,7 +19,6 @@ This file implements HTTP APIs for the inference engine via fastapi.

 import asyncio
 import dataclasses
-import json
 import logging
 import multiprocessing as multiprocessing
 import os

--- a/python/sglang/srt/entrypoints/http_server_engine.py
+++ b/python/sglang/srt/entrypoints/http_server_engine.py
-import copy
-import dataclasses
 import multiprocessing
-import pickle
-import threading
 import time
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import List, Optional, Tuple

-import pybase64
 import requests
 import torch
-import torch.distributed as dist

 from sglang.srt.entrypoints.EngineBase import EngineBase
 from sglang.srt.entrypoints.http_server import launch_server

--- a/python/sglang/srt/eplb/eplb_algorithms/deepseek.py
+++ b/python/sglang/srt/eplb/eplb_algorithms/deepseek.py
@@ -3,8 +3,6 @@ from typing import Tuple

 import torch

-from sglang.srt.utils import get_bool_env_var
-

 def balanced_packing(
    weight: torch.Tensor, num_packs: int