[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

62797440 · Chang Su · GitHub · 2614adf9 · 62797440 · 62797440
Unverified Commit 62797440 authored Oct 17, 2025 by Chang Su Committed by GitHub Oct 17, 2025
20 changed files
--- a/python/sglang/srt/mem_cache/mamba_radix_cache.py
+++ b/python/sglang/srt/mem_cache/mamba_radix_cache.py
@@ -22,7 +22,6 @@ The radix tree data structure for managing the hybrid (full and Mamba) KV cache.
 import heapq
 import time
 from collections import defaultdict
-from functools import partial
 from typing import TYPE_CHECKING, List, Optional, Tuple
 import torch
@@ -33,7 +32,6 @@ from sglang.srt.mem_cache.memory_pool import HybridReqToTokenPool
 from sglang.srt.mem_cache.radix_cache import (
    RadixKey,
    _key_match_page_size1,
-    _key_match_paged,
    get_child_key,
 )

--- a/python/sglang/srt/mem_cache/memory_pool_host.py
+++ b/python/sglang/srt/mem_cache/memory_pool_host.py
 import abc
 import logging
 import threading
-from enum import IntEnum
 from functools import wraps
 from typing import Optional

--- a/python/sglang/srt/mem_cache/multimodal_cache.py
+++ b/python/sglang/srt/mem_cache/multimodal_cache.py
 import logging
 from collections import OrderedDict
-from typing import Dict
 import torch

--- a/python/sglang/srt/mem_cache/radix_cache.py
+++ b/python/sglang/srt/mem_cache/radix_cache.py
@@ -23,7 +23,7 @@ import heapq
 import time
 from collections import defaultdict
 from functools import lru_cache, partial
-from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
 import torch

--- a/python/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py
+++ b/python/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py
@@ -3,20 +3,8 @@ import os
 import torch
 import torch.distributed
-from aibrix_kvcache import (
+from aibrix_kvcache.common.absl_logging import log_every_n_seconds
-    BaseKVCacheManager,
-    GroupAwareKVCacheManager,
-    KVCacheBlockLayout,
-    KVCacheBlockSpec,
-    KVCacheConfig,
-    KVCacheMetrics,
-    KVCacheTensorSpec,
-    ModelSpec,
-    TokenListView,
-)
-from aibrix_kvcache.common.absl_logging import getLogger, log_every_n_seconds, log_if
 from aibrix_kvcache_storage import AibrixKVCacheStorage
-from torch.distributed import Backend, ProcessGroup
 from sglang.srt.mem_cache.hicache_storage import HiCacheStorageConfig
 from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool

--- a/python/sglang/srt/mem_cache/storage/eic/eic_storage.py
+++ b/python/sglang/srt/mem_cache/storage/eic/eic_storage.py
@@ -2,21 +2,18 @@ import json
 import logging
 import os
 import time
-import uuid
+from typing import Any, List, Optional, Tuple
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
 import eic
 import torch
 import yaml
-from sglang.srt.layers.dp_attention import get_attention_tp_rank, get_attention_tp_size
 from sglang.srt.mem_cache.hicache_storage import (
    HiCacheStorage,
    HiCacheStorageConfig,
    HiCacheStorageExtraInfo,
 )
-from sglang.srt.mem_cache.memory_pool_host import HostKVCache, MLATokenToKVPoolHost
+from sglang.srt.mem_cache.memory_pool_host import HostKVCache
 logger = logging.getLogger(__name__)

--- a/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
 import logging
 import os
-import threading
 from abc import ABC, abstractmethod
 from typing import List

--- a/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
+++ b/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 import logging
 import threading
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
 import torch

--- a/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
-import hashlib
 import logging
 import os
 import time
 import uuid
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Union
 import torch

--- a/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
 import logging
 import os
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 import torch

--- a/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
@@ -2,7 +2,7 @@
 import os
 import unittest
-from typing import List, Optional
+from typing import List
 from unittest.mock import MagicMock
 import torch

--- a/python/sglang/srt/metrics/func_timer.py
+++ b/python/sglang/srt/metrics/func_timer.py
@@ -18,7 +18,7 @@ Records the latency of some functions
 import asyncio
 import time
 from functools import wraps
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Optional
 from sglang.srt.metrics.utils import exponential_buckets

--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -104,11 +104,7 @@ from sglang.srt.mem_cache.memory_pool import (
 )
 from sglang.srt.model_executor.cpu_graph_runner import CPUGraphRunner
 from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
-from sglang.srt.model_executor.forward_batch_info import (
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
-    ForwardBatch,
-    ForwardMode,
-    PPProxyTensors,
-)
 from sglang.srt.model_executor.npu_graph_runner import NPUGraphRunner
 from sglang.srt.model_executor.piecewise_cuda_graph_runner import (
    PiecewiseCudaGraphRunner,

--- a/python/sglang/srt/model_executor/npu_graph_runner.py
+++ b/python/sglang/srt/model_executor/npu_graph_runner.py
@@ -19,10 +19,9 @@ import logging
 import threading
 from typing import TYPE_CHECKING, Optional, Union
-import numpy as np
 import torch
-from sglang.srt.configs.model_config import AttentionArch, is_deepseek_nsa
+from sglang.srt.configs.model_config import is_deepseek_nsa
 from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
 logger = logging.getLogger(__name__)

--- a/python/sglang/srt/models/bailing_moe.py
+++ b/python/sglang/srt/models/bailing_moe.py
@@ -19,7 +19,7 @@
 # limitations under the License.
 """SGLang BailingMoE model."""
 import logging
-from typing import Any, Dict, Iterable, Optional, Tuple, Union
+from typing import Iterable, Optional, Tuple, Union
 import torch
 import torch.nn.functional as F
@@ -59,7 +59,6 @@ from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
 from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
 from sglang.srt.layers.moe.token_dispatcher import DeepEPDispatcher
 from sglang.srt.layers.moe.topk import TopK
-from sglang.srt.layers.moe.utils import DeepEPMode
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.layers.rotary_embedding import get_rope

--- a/python/sglang/srt/models/bert.py
+++ b/python/sglang/srt/models/bert.py
 # SPDX-License-Identifier: Apache-2.0
-from typing import Any, Dict, Iterable, Optional, Set, Tuple
+from typing import Iterable, Optional, Set, Tuple
 import torch
 from torch import nn

--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -183,9 +183,9 @@ elif _is_hip:
        awq_dequantize_triton as awq_dequantize,
    )
 elif _is_npu:
-    import custom_ops
+    import custom_ops  # noqa: F401
-    import sgl_kernel_npu
+    import sgl_kernel_npu  # noqa: F401
-    import torch_npu
+    import torch_npu  # noqa: F401
 else:
    pass

--- a/python/sglang/srt/models/dots_ocr.py
+++ b/python/sglang/srt/models/dots_ocr.py
@@ -6,7 +6,6 @@ from typing import Iterable, List, Optional, Tuple
 import torch
 import torch.nn as nn
-from transformers.activations import ACT2FN
 from sglang.srt.configs import DotsOCRConfig
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -22,7 +21,6 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
 from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
 from sglang.srt.models.qwen2 import Qwen2ForCausalLM
 from sglang.srt.utils import add_prefix
-from sglang.srt.utils.hf_transformers_utils import get_processor
 logger = logging.getLogger(__name__)

--- a/python/sglang/srt/models/dots_vlm.py
+++ b/python/sglang/srt/models/dots_vlm.py
@@ -23,7 +23,6 @@ import torch
 from torch import nn
 from sglang.srt.configs.dots_vlm import DotsVLMConfig
-from sglang.srt.distributed import parallel_state
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.managers.mm_utils import (
    MultiModalityDataPaddingPatternMultimodalTokens,

--- a/python/sglang/srt/models/falcon_h1.py
+++ b/python/sglang/srt/models/falcon_h1.py
-import enum
 import logging
 from typing import Any, Iterable, List, Optional, Set, Tuple