Unverified Commit 62797440 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)

parent 2614adf9
...@@ -8,7 +8,7 @@ import torch ...@@ -8,7 +8,7 @@ import torch
import triton import triton
import triton.language as tl import triton.language as tl
from sglang.srt.utils import get_bool_env_var, get_device_name, is_cuda from sglang.srt.utils import get_device_name, is_cuda
_is_cuda = is_cuda() _is_cuda = is_cuda()
if _is_cuda: if _is_cuda:
......
...@@ -1059,16 +1059,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): ...@@ -1059,16 +1059,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
intermediate_size, intermediate_size,
num_experts, num_experts,
): ):
from flashinfer import ( from flashinfer import nvfp4_block_scale_interleave
RoutingMethodType,
e2m1_and_ufp8sf_scale_to_float,
fp4_quantize,
next_positive_power_of_2,
nvfp4_block_scale_interleave,
reorder_rows_for_gated_act_gemm,
shuffle_matrix_a,
shuffle_matrix_sf_a,
)
from flashinfer.fused_moe.core import ( from flashinfer.fused_moe.core import (
_maybe_get_cached_w2_permute_indices, _maybe_get_cached_w2_permute_indices,
_maybe_get_cached_w3_w1_permute_indices, _maybe_get_cached_w3_w1_permute_indices,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import logging import logging
from typing import Any, Callable, Dict, List, Optional from typing import Any, Dict, List, Optional
import regex as re import regex as re
import torch import torch
......
...@@ -3,16 +3,16 @@ ...@@ -3,16 +3,16 @@
from __future__ import annotations from __future__ import annotations
import logging import logging
from typing import TYPE_CHECKING, Any, Callable, Optional from typing import TYPE_CHECKING, Any
import torch import torch
from aiter import ActivationType, QuantType, biased_grouped_topk from aiter import ActivationType, QuantType
from aiter.fused_moe import fused_moe from aiter.fused_moe import fused_moe
from aiter.utility.fp4_utils import e8m0_shuffle from aiter.utility.fp4_utils import e8m0_shuffle
from sglang.srt.layers.moe import MoeRunnerConfig from sglang.srt.layers.moe import MoeRunnerConfig
from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
from sglang.srt.utils import get_bool_env_var, is_hip, mxfp_supported, set_weight_attrs from sglang.srt.utils import is_hip, set_weight_attrs
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.layers.moe.token_dispatcher import ( from sglang.srt.layers.moe.token_dispatcher import (
......
...@@ -2,20 +2,13 @@ ...@@ -2,20 +2,13 @@
from typing import Any, Callable, Optional from typing import Any, Callable, Optional
import aiter
import torch import torch
import torch.nn.functional as F
from aiter.ops.gemm_op_a4w4 import gemm_a4w4
from aiter.ops.shuffle import shuffle_weight
from aiter.ops.triton.gemm_afp4wfp4 import gemm_afp4wfp4 from aiter.ops.triton.gemm_afp4wfp4 import gemm_afp4wfp4
from aiter.ops.triton.gemm_afp4wfp4_pre_quant_atomic import gemm_afp4wfp4_pre_quant from aiter.ops.triton.gemm_afp4wfp4_pre_quant_atomic import gemm_afp4wfp4_pre_quant
from aiter.ops.triton.quant import dynamic_mxfp4_quant from aiter.ops.triton.quant import dynamic_mxfp4_quant
from aiter.utility import dtypes
from aiter.utility.fp4_utils import e8m0_shuffle
from sglang.srt.layers.parameter import GroupQuantScaleParameter, PackedvLLMParameter from sglang.srt.layers.parameter import GroupQuantScaleParameter, PackedvLLMParameter
from sglang.srt.layers.quantization.quark.schemes import QuarkScheme from sglang.srt.layers.quantization.quark.schemes import QuarkScheme
from sglang.srt.utils import get_bool_env_var
__all__ = ["QuarkW4A4MXFP4"] __all__ = ["QuarkW4A4MXFP4"]
......
...@@ -11,7 +11,6 @@ import numpy ...@@ -11,7 +11,6 @@ import numpy
import torch import torch
from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant
from sglang.srt.utils import is_cuda
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.base_config import QuantizationConfig
......
from __future__ import annotations from __future__ import annotations
import logging import logging
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple from typing import TYPE_CHECKING, Any, Dict, List, Optional
import torch import torch
from torch.nn import Module from torch.nn import Module
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from sglang.srt.distributed.parallel_state import get_moe_expert_parallel_world_size from sglang.srt.layers.linear import UnquantizedLinearMethod
from sglang.srt.layers.linear import LinearBase, UnquantizedLinearMethod
from sglang.srt.layers.quantization.base_config import ( from sglang.srt.layers.quantization.base_config import (
FusedMoEMethodBase, FusedMoEMethodBase,
QuantizationConfig, QuantizationConfig,
...@@ -17,11 +16,11 @@ from sglang.srt.layers.quantization.base_config import ( ...@@ -17,11 +16,11 @@ from sglang.srt.layers.quantization.base_config import (
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod
from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
from sglang.srt.layers.quantization.utils import is_layer_skipped from sglang.srt.layers.quantization.utils import is_layer_skipped
from sglang.srt.utils import is_npu, set_weight_attrs from sglang.srt.utils import set_weight_attrs
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.layers.moe import MoeRunnerConfig from sglang.srt.layers.moe import MoeRunnerConfig
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE
from sglang.srt.layers.moe.token_dispatcher import ( from sglang.srt.layers.moe.token_dispatcher import (
CombineInput, CombineInput,
DeepEPNormalOutput, DeepEPNormalOutput,
......
from __future__ import annotations from __future__ import annotations
import importlib
import sys
from types import MappingProxyType from types import MappingProxyType
from typing import ( from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Optional,
Tuple,
Union,
cast,
)
import torch import torch
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from sglang.srt.distributed import ( from sglang.srt.distributed import get_tensor_model_parallel_world_size
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
......
import logging import logging
import re import re
from functools import lru_cache
import torch import torch
......
...@@ -11,7 +11,6 @@ from sglang.srt.lora.triton_ops import ( ...@@ -11,7 +11,6 @@ from sglang.srt.lora.triton_ops import (
) )
from sglang.srt.lora.utils import LoRABatchInfo from sglang.srt.lora.utils import LoRABatchInfo
from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.server_args import ServerArgs
class TritonLoRABackend(BaseLoRABackend): class TritonLoRABackend(BaseLoRABackend):
......
...@@ -20,7 +20,7 @@ import logging ...@@ -20,7 +20,7 @@ import logging
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections import OrderedDict from collections import OrderedDict
from typing import Any, Dict, List, Optional, Set from typing import Optional, Set
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# and "Punica: Multi-Tenant LoRA Serving" # and "Punica: Multi-Tenant LoRA Serving"
import logging import logging
from typing import Dict, Iterable, List, Optional, Set, Tuple from typing import Dict, Iterable, List, Optional
import torch import torch
......
...@@ -14,11 +14,10 @@ limitations under the License. ...@@ -14,11 +14,10 @@ limitations under the License.
""" """
import logging import logging
import math
import threading import threading
import time import time
from queue import Empty, Full, PriorityQueue, Queue from queue import Empty, Full, Queue
from typing import TYPE_CHECKING, List, NamedTuple, Optional, Set, Tuple from typing import TYPE_CHECKING, List, NamedTuple, Optional
import torch import torch
...@@ -41,7 +40,7 @@ from sglang.srt.layers.dp_attention import ( ...@@ -41,7 +40,7 @@ from sglang.srt.layers.dp_attention import (
get_attention_tp_size, get_attention_tp_size,
is_dp_attention_enabled, is_dp_attention_enabled,
) )
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool, MLATokenToKVPool from sglang.srt.mem_cache.memory_pool import MLATokenToKVPool
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -59,11 +59,10 @@ from sglang.srt.mem_cache.allocator import ( ...@@ -59,11 +59,10 @@ from sglang.srt.mem_cache.allocator import (
SWATokenToKVPoolAllocator, SWATokenToKVPoolAllocator,
) )
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.chunk_cache import ChunkCache, SWAChunkCache from sglang.srt.mem_cache.chunk_cache import SWAChunkCache
from sglang.srt.mem_cache.common import ( from sglang.srt.mem_cache.common import (
alloc_for_decode, alloc_for_decode,
alloc_for_extend, alloc_for_extend,
alloc_token_slots,
evict_from_tree_cache, evict_from_tree_cache,
) )
from sglang.srt.mem_cache.mamba_radix_cache import MambaRadixCache from sglang.srt.mem_cache.mamba_radix_cache import MambaRadixCache
...@@ -76,7 +75,6 @@ from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo ...@@ -76,7 +75,6 @@ from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from sglang.srt.sampling.sampling_params import SamplingParams from sglang.srt.sampling.sampling_params import SamplingParams
from sglang.srt.server_args import ServerArgs, get_global_server_args from sglang.srt.server_args import ServerArgs, get_global_server_args
from sglang.srt.utils import flatten_nested_list from sglang.srt.utils import flatten_nested_list
from sglang.srt.utils.common import next_power_of_2
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.configs.model_config import ModelConfig from sglang.srt.configs.model_config import ModelConfig
......
...@@ -3,13 +3,10 @@ from __future__ import annotations ...@@ -3,13 +3,10 @@ from __future__ import annotations
import logging import logging
import time import time
from collections import defaultdict from collections import defaultdict
from typing import TYPE_CHECKING, Dict, List, Optional, Union from typing import TYPE_CHECKING, List, Optional
import torch
from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch
from sglang.srt.disaggregation.utils import DisaggregationMode from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.managers.io_struct import TokenizedGenerateReqInput
from sglang.srt.managers.schedule_policy import PrefillAdder from sglang.srt.managers.schedule_policy import PrefillAdder
from sglang.srt.managers.scheduler import Req, ScheduleBatch from sglang.srt.managers.scheduler import Req, ScheduleBatch
from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import asyncio import asyncio
import copy import copy
import dataclasses import dataclasses
import json
import logging import logging
import math import math
import os import os
......
from __future__ import annotations from __future__ import annotations
import logging import logging
import multiprocessing as mp from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING, Dict, List, Optional
from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.schedule_batch import Req from sglang.srt.managers.schedule_batch import Req
......
...@@ -92,7 +92,7 @@ class AscendPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator): ...@@ -92,7 +92,7 @@ class AscendPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator):
) )
if num_new_pages_item < 200: if num_new_pages_item < 200:
import sgl_kernel_npu import sgl_kernel_npu # noqa: F401
torch.ops.npu.alloc_extend( torch.ops.npu.alloc_extend(
prefix_lens, prefix_lens,
......
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, List, NamedTuple, Optional, Tuple from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple
import torch import torch
......
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, List, Tuple, Union from typing import TYPE_CHECKING, Tuple, Union
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.mem_cache.radix_cache import TreeNode from sglang.srt.mem_cache.radix_cache import TreeNode
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment