Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
......@@ -8,7 +8,7 @@ import torch
import triton
import triton.language as tl
from sglang.srt.utils import get_bool_env_var, get_device_name, is_cuda
from sglang.srt.utils import get_device_name, is_cuda
_is_cuda = is_cuda()
if _is_cuda:
......
......@@ -1059,16 +1059,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
intermediate_size,
num_experts,
):
from flashinfer import (
RoutingMethodType,
e2m1_and_ufp8sf_scale_to_float,
fp4_quantize,
next_positive_power_of_2,
nvfp4_block_scale_interleave,
reorder_rows_for_gated_act_gemm,
shuffle_matrix_a,
shuffle_matrix_sf_a,
)
from flashinfer import nvfp4_block_scale_interleave
from flashinfer.fused_moe.core import (
_maybe_get_cached_w2_permute_indices,
_maybe_get_cached_w3_w1_permute_indices,
......
......@@ -2,7 +2,7 @@
import logging
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Dict, List, Optional
import regex as re
import torch
......
......@@ -3,16 +3,16 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any, Callable, Optional
from typing import TYPE_CHECKING, Any
import torch
from aiter import ActivationType, QuantType, biased_grouped_topk
from aiter import ActivationType, QuantType
from aiter.fused_moe import fused_moe
from aiter.utility.fp4_utils import e8m0_shuffle
from sglang.srt.layers.moe import MoeRunnerConfig
from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
from sglang.srt.utils import get_bool_env_var, is_hip, mxfp_supported, set_weight_attrs
from sglang.srt.utils import is_hip, set_weight_attrs
if TYPE_CHECKING:
from sglang.srt.layers.moe.token_dispatcher import (
......
......@@ -2,20 +2,13 @@
from typing import Any, Callable, Optional
import aiter
import torch
import torch.nn.functional as F
from aiter.ops.gemm_op_a4w4 import gemm_a4w4
from aiter.ops.shuffle import shuffle_weight
from aiter.ops.triton.gemm_afp4wfp4 import gemm_afp4wfp4
from aiter.ops.triton.gemm_afp4wfp4_pre_quant_atomic import gemm_afp4wfp4_pre_quant
from aiter.ops.triton.quant import dynamic_mxfp4_quant
from aiter.utility import dtypes
from aiter.utility.fp4_utils import e8m0_shuffle
from sglang.srt.layers.parameter import GroupQuantScaleParameter, PackedvLLMParameter
from sglang.srt.layers.quantization.quark.schemes import QuarkScheme
from sglang.srt.utils import get_bool_env_var
__all__ = ["QuarkW4A4MXFP4"]
......
......@@ -11,7 +11,6 @@ import numpy
import torch
from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant
from sglang.srt.utils import is_cuda
if TYPE_CHECKING:
from sglang.srt.layers.quantization.base_config import QuantizationConfig
......
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Dict, List, Optional
import torch
from torch.nn import Module
from torch.nn.parameter import Parameter
from sglang.srt.distributed.parallel_state import get_moe_expert_parallel_world_size
from sglang.srt.layers.linear import LinearBase, UnquantizedLinearMethod
from sglang.srt.layers.linear import UnquantizedLinearMethod
from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase,
QuantizationConfig,
......@@ -17,11 +16,11 @@ from sglang.srt.layers.quantization.base_config import (
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod
from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
from sglang.srt.layers.quantization.utils import is_layer_skipped
from sglang.srt.utils import is_npu, set_weight_attrs
from sglang.srt.utils import set_weight_attrs
if TYPE_CHECKING:
from sglang.srt.layers.moe import MoeRunnerConfig
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE
from sglang.srt.layers.moe.token_dispatcher import (
CombineInput,
DeepEPNormalOutput,
......
from __future__ import annotations
import importlib
import sys
from types import MappingProxyType
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Optional,
Tuple,
Union,
cast,
)
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast
import torch
from torch.nn.parameter import Parameter
from sglang.srt.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.distributed import get_tensor_model_parallel_world_size
from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
......
import logging
import re
from functools import lru_cache
import torch
......
......@@ -11,7 +11,6 @@ from sglang.srt.lora.triton_ops import (
)
from sglang.srt.lora.utils import LoRABatchInfo
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.server_args import ServerArgs
class TritonLoRABackend(BaseLoRABackend):
......
......@@ -20,7 +20,7 @@ import logging
import time
from abc import ABC, abstractmethod
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Set
from typing import Optional, Set
logger = logging.getLogger(__name__)
......
......@@ -16,7 +16,7 @@
# and "Punica: Multi-Tenant LoRA Serving"
import logging
from typing import Dict, Iterable, List, Optional, Set, Tuple
from typing import Dict, Iterable, List, Optional
import torch
......
......@@ -14,11 +14,10 @@ limitations under the License.
"""
import logging
import math
import threading
import time
from queue import Empty, Full, PriorityQueue, Queue
from typing import TYPE_CHECKING, List, NamedTuple, Optional, Set, Tuple
from queue import Empty, Full, Queue
from typing import TYPE_CHECKING, List, NamedTuple, Optional
import torch
......@@ -41,7 +40,7 @@ from sglang.srt.layers.dp_attention import (
get_attention_tp_size,
is_dp_attention_enabled,
)
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool, MLATokenToKVPool
from sglang.srt.mem_cache.memory_pool import MLATokenToKVPool
logger = logging.getLogger(__name__)
......
......@@ -59,11 +59,10 @@ from sglang.srt.mem_cache.allocator import (
SWATokenToKVPoolAllocator,
)
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.chunk_cache import ChunkCache, SWAChunkCache
from sglang.srt.mem_cache.chunk_cache import SWAChunkCache
from sglang.srt.mem_cache.common import (
alloc_for_decode,
alloc_for_extend,
alloc_token_slots,
evict_from_tree_cache,
)
from sglang.srt.mem_cache.mamba_radix_cache import MambaRadixCache
......@@ -76,7 +75,6 @@ from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server_args import ServerArgs, get_global_server_args
from sglang.srt.utils import flatten_nested_list
from sglang.srt.utils.common import next_power_of_2
if TYPE_CHECKING:
from sglang.srt.configs.model_config import ModelConfig
......
......@@ -3,13 +3,10 @@ from __future__ import annotations
import logging
import time
from collections import defaultdict
from typing import TYPE_CHECKING, Dict, List, Optional, Union
import torch
from typing import TYPE_CHECKING, List, Optional
from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch
from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.managers.io_struct import TokenizedGenerateReqInput
from sglang.srt.managers.schedule_policy import PrefillAdder
from sglang.srt.managers.scheduler import Req, ScheduleBatch
from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
......
......@@ -16,7 +16,6 @@
import asyncio
import copy
import dataclasses
import json
import logging
import math
import os
......
from __future__ import annotations
import logging
import multiprocessing as mp
from typing import TYPE_CHECKING, Dict, List, Optional
from typing import TYPE_CHECKING, Optional
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.schedule_batch import Req
......
......@@ -92,7 +92,7 @@ class AscendPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator):
)
if num_new_pages_item < 200:
import sgl_kernel_npu
import sgl_kernel_npu # noqa: F401
torch.ops.npu.alloc_extend(
prefix_lens,
......
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, List, NamedTuple, Optional, Tuple
from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple
import torch
......
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, List, Tuple, Union
from typing import TYPE_CHECKING, Tuple, Union
if TYPE_CHECKING:
from sglang.srt.mem_cache.radix_cache import TreeNode
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment