Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
...@@ -22,7 +22,6 @@ The radix tree data structure for managing the hybrid (full and Mamba) KV cache. ...@@ -22,7 +22,6 @@ The radix tree data structure for managing the hybrid (full and Mamba) KV cache.
import heapq import heapq
import time import time
from collections import defaultdict from collections import defaultdict
from functools import partial
from typing import TYPE_CHECKING, List, Optional, Tuple from typing import TYPE_CHECKING, List, Optional, Tuple
import torch import torch
...@@ -33,7 +32,6 @@ from sglang.srt.mem_cache.memory_pool import HybridReqToTokenPool ...@@ -33,7 +32,6 @@ from sglang.srt.mem_cache.memory_pool import HybridReqToTokenPool
from sglang.srt.mem_cache.radix_cache import ( from sglang.srt.mem_cache.radix_cache import (
RadixKey, RadixKey,
_key_match_page_size1, _key_match_page_size1,
_key_match_paged,
get_child_key, get_child_key,
) )
......
import abc import abc
import logging import logging
import threading import threading
from enum import IntEnum
from functools import wraps from functools import wraps
from typing import Optional from typing import Optional
......
import logging import logging
from collections import OrderedDict from collections import OrderedDict
from typing import Dict
import torch import torch
......
...@@ -23,7 +23,7 @@ import heapq ...@@ -23,7 +23,7 @@ import heapq
import time import time
from collections import defaultdict from collections import defaultdict
from functools import lru_cache, partial from functools import lru_cache, partial
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple, Union from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
import torch import torch
......
...@@ -3,20 +3,8 @@ import os ...@@ -3,20 +3,8 @@ import os
import torch import torch
import torch.distributed import torch.distributed
from aibrix_kvcache import ( from aibrix_kvcache.common.absl_logging import log_every_n_seconds
BaseKVCacheManager,
GroupAwareKVCacheManager,
KVCacheBlockLayout,
KVCacheBlockSpec,
KVCacheConfig,
KVCacheMetrics,
KVCacheTensorSpec,
ModelSpec,
TokenListView,
)
from aibrix_kvcache.common.absl_logging import getLogger, log_every_n_seconds, log_if
from aibrix_kvcache_storage import AibrixKVCacheStorage from aibrix_kvcache_storage import AibrixKVCacheStorage
from torch.distributed import Backend, ProcessGroup
from sglang.srt.mem_cache.hicache_storage import HiCacheStorageConfig from sglang.srt.mem_cache.hicache_storage import HiCacheStorageConfig
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool
......
...@@ -2,21 +2,18 @@ import json ...@@ -2,21 +2,18 @@ import json
import logging import logging
import os import os
import time import time
import uuid from typing import Any, List, Optional, Tuple
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
import eic import eic
import torch import torch
import yaml import yaml
from sglang.srt.layers.dp_attention import get_attention_tp_rank, get_attention_tp_size
from sglang.srt.mem_cache.hicache_storage import ( from sglang.srt.mem_cache.hicache_storage import (
HiCacheStorage, HiCacheStorage,
HiCacheStorageConfig, HiCacheStorageConfig,
HiCacheStorageExtraInfo, HiCacheStorageExtraInfo,
) )
from sglang.srt.mem_cache.memory_pool_host import HostKVCache, MLATokenToKVPoolHost from sglang.srt.mem_cache.memory_pool_host import HostKVCache
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
import logging import logging
import os import os
import threading
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import List from typing import List
......
...@@ -2,7 +2,7 @@ from __future__ import annotations ...@@ -2,7 +2,7 @@ from __future__ import annotations
import logging import logging
import threading import threading
from typing import TYPE_CHECKING, List, Optional from typing import TYPE_CHECKING, Optional
import torch import torch
......
import hashlib
import logging import logging
import os import os
import time import time
import uuid import uuid
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, List, Optional, Union
import torch import torch
......
import logging import logging
import os import os
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, List, Optional, Tuple, Union
import torch import torch
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import os import os
import unittest import unittest
from typing import List, Optional from typing import List
from unittest.mock import MagicMock from unittest.mock import MagicMock
import torch import torch
......
...@@ -18,7 +18,7 @@ Records the latency of some functions ...@@ -18,7 +18,7 @@ Records the latency of some functions
import asyncio import asyncio
import time import time
from functools import wraps from functools import wraps
from typing import Any, Callable, List, Optional from typing import Any, Callable, Optional
from sglang.srt.metrics.utils import exponential_buckets from sglang.srt.metrics.utils import exponential_buckets
......
...@@ -104,11 +104,7 @@ from sglang.srt.mem_cache.memory_pool import ( ...@@ -104,11 +104,7 @@ from sglang.srt.mem_cache.memory_pool import (
) )
from sglang.srt.model_executor.cpu_graph_runner import CPUGraphRunner from sglang.srt.model_executor.cpu_graph_runner import CPUGraphRunner
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
from sglang.srt.model_executor.forward_batch_info import ( from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
ForwardBatch,
ForwardMode,
PPProxyTensors,
)
from sglang.srt.model_executor.npu_graph_runner import NPUGraphRunner from sglang.srt.model_executor.npu_graph_runner import NPUGraphRunner
from sglang.srt.model_executor.piecewise_cuda_graph_runner import ( from sglang.srt.model_executor.piecewise_cuda_graph_runner import (
PiecewiseCudaGraphRunner, PiecewiseCudaGraphRunner,
......
...@@ -19,10 +19,9 @@ import logging ...@@ -19,10 +19,9 @@ import logging
import threading import threading
from typing import TYPE_CHECKING, Optional, Union from typing import TYPE_CHECKING, Optional, Union
import numpy as np
import torch import torch
from sglang.srt.configs.model_config import AttentionArch, is_deepseek_nsa from sglang.srt.configs.model_config import is_deepseek_nsa
from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
# limitations under the License. # limitations under the License.
"""SGLang BailingMoE model.""" """SGLang BailingMoE model."""
import logging import logging
from typing import Any, Dict, Iterable, Optional, Tuple, Union from typing import Iterable, Optional, Tuple, Union
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
...@@ -59,7 +59,6 @@ from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class ...@@ -59,7 +59,6 @@ from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.moe.token_dispatcher import DeepEPDispatcher from sglang.srt.layers.moe.token_dispatcher import DeepEPDispatcher
from sglang.srt.layers.moe.topk import TopK from sglang.srt.layers.moe.topk import TopK
from sglang.srt.layers.moe.utils import DeepEPMode
from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.rotary_embedding import get_rope from sglang.srt.layers.rotary_embedding import get_rope
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import Any, Dict, Iterable, Optional, Set, Tuple from typing import Iterable, Optional, Set, Tuple
import torch import torch
from torch import nn from torch import nn
......
...@@ -183,9 +183,9 @@ elif _is_hip: ...@@ -183,9 +183,9 @@ elif _is_hip:
awq_dequantize_triton as awq_dequantize, awq_dequantize_triton as awq_dequantize,
) )
elif _is_npu: elif _is_npu:
import custom_ops import custom_ops # noqa: F401
import sgl_kernel_npu import sgl_kernel_npu # noqa: F401
import torch_npu import torch_npu # noqa: F401
else: else:
pass pass
......
...@@ -6,7 +6,6 @@ from typing import Iterable, List, Optional, Tuple ...@@ -6,7 +6,6 @@ from typing import Iterable, List, Optional, Tuple
import torch import torch
import torch.nn as nn import torch.nn as nn
from transformers.activations import ACT2FN
from sglang.srt.configs import DotsOCRConfig from sglang.srt.configs import DotsOCRConfig
from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.logits_processor import LogitsProcessor
...@@ -22,7 +21,6 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader ...@@ -22,7 +21,6 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
from sglang.srt.models.qwen2 import Qwen2ForCausalLM from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.utils import add_prefix from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -23,7 +23,6 @@ import torch ...@@ -23,7 +23,6 @@ import torch
from torch import nn from torch import nn
from sglang.srt.configs.dots_vlm import DotsVLMConfig from sglang.srt.configs.dots_vlm import DotsVLMConfig
from sglang.srt.distributed import parallel_state
from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.managers.mm_utils import ( from sglang.srt.managers.mm_utils import (
MultiModalityDataPaddingPatternMultimodalTokens, MultiModalityDataPaddingPatternMultimodalTokens,
......
import enum
import logging import logging
from typing import Any, Iterable, List, Optional, Set, Tuple from typing import Any, Iterable, List, Optional, Set, Tuple
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment