Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
62797440
Unverified
Commit
62797440
authored
Oct 17, 2025
by
Chang Su
Committed by
GitHub
Oct 17, 2025
Browse files
[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)
parent
2614adf9
Changes
150
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
23 additions
and
45 deletions
+23
-45
python/sglang/srt/layers/attention/nsa/nsa_indexer.py
python/sglang/srt/layers/attention/nsa/nsa_indexer.py
+2
-2
python/sglang/srt/layers/attention/nsa_backend.py
python/sglang/srt/layers/attention/nsa_backend.py
+3
-4
python/sglang/srt/layers/layernorm.py
python/sglang/srt/layers/layernorm.py
+1
-1
python/sglang/srt/layers/moe/cutlass_moe.py
python/sglang/srt/layers/moe/cutlass_moe.py
+0
-2
python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
+0
-1
python/sglang/srt/layers/moe/ep_moe/kernels.py
python/sglang/srt/layers/moe/ep_moe/kernels.py
+1
-4
python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
+1
-1
python/sglang/srt/layers/moe/fused_moe_triton/layer.py
python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+1
-7
python/sglang/srt/layers/moe/moe_runner/triton.py
python/sglang/srt/layers/moe/moe_runner/triton.py
+3
-1
python/sglang/srt/layers/moe/rocm_moe_utils.py
python/sglang/srt/layers/moe/rocm_moe_utils.py
+0
-1
python/sglang/srt/layers/moe/token_dispatcher/deepep.py
python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+1
-1
python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
+1
-1
python/sglang/srt/layers/quantization/awq.py
python/sglang/srt/layers/quantization/awq.py
+1
-1
python/sglang/srt/layers/quantization/base_config.py
python/sglang/srt/layers/quantization/base_config.py
+0
-1
python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
...quantization/compressed_tensors/compressed_tensors_moe.py
+3
-10
python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
...g/srt/layers/quantization/deep_gemm_wrapper/configurer.py
+1
-1
python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
...g/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
+2
-2
python/sglang/srt/layers/quantization/fp8_kernel.py
python/sglang/srt/layers/quantization/fp8_kernel.py
+1
-1
python/sglang/srt/layers/quantization/fpgemm_fp8.py
python/sglang/srt/layers/quantization/fpgemm_fp8.py
+1
-2
python/sglang/srt/layers/quantization/gptq.py
python/sglang/srt/layers/quantization/gptq.py
+0
-1
No files found.
python/sglang/srt/layers/attention/nsa/nsa_indexer.py
View file @
62797440
from
__future__
import
annotations
from
abc
import
ABC
,
abstractmethod
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Optional
,
Tuple
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
Optional
import
torch
import
torch.nn.functional
as
F
...
...
@@ -547,7 +547,7 @@ class Indexer(CustomOp):
forward_batch
:
ForwardBatch
,
layer_id
:
int
,
)
->
torch
.
Tensor
:
import
custom_ops
import
custom_ops
# noqa: F401
import
torch_npu
from
sglang.srt.layers.dp_attention
import
(
...
...
python/sglang/srt/layers/attention/nsa_backend.py
View file @
62797440
from
__future__
import
annotations
import
sys
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Literal
,
Optional
,
TypeAlias
...
...
@@ -34,18 +33,18 @@ _is_hip = is_hip()
if
_is_hip
:
try
:
from
aiter
import
(
from
aiter
import
(
# noqa: F401
flash_attn_varlen_func
,
mha_batch_prefill_func
,
paged_attention_ragged
,
)
from
aiter.mla
import
mla_decode_fwd
,
mla_prefill_fwd
from
aiter.mla
import
mla_decode_fwd
,
mla_prefill_fwd
# noqa: F401
except
ImportError
:
print
(
"aiter is AMD specific kernel library. Please make sure aiter is installed on your AMD device."
)
else
:
from
sgl_kernel.flash_attn
import
flash_attn_varlen_func
,
flash_attn_with_kvcache
from
sgl_kernel.flash_attn
import
flash_attn_with_kvcache
@
dataclass
(
frozen
=
True
)
...
...
python/sglang/srt/layers/layernorm.py
View file @
62797440
...
...
@@ -372,4 +372,4 @@ if not (
logger
.
info
(
"sgl-kernel layernorm implementation is not available on current platform. Fallback to other kernel libraries."
)
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
,
RMSNorm
from
vllm.model_executor.layers.layernorm
import
GemmaRMSNorm
,
RMSNorm
# noqa: F401
python/sglang/srt/layers/moe/cutlass_moe.py
View file @
62797440
...
...
@@ -116,8 +116,6 @@ def cutlass_fused_experts_fp8(
if
is_cuda
:
from
sglang.srt.layers.quantization.fp8_kernel
import
(
per_group_transpose
,
per_token_group_quant_fp8_hopper_moe_mn_major
,
sglang_per_token_group_quant_fp8
,
)
...
...
python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
View file @
62797440
# SPDX-License-Identifier: Apache-2.0
"""Cutlass W4A8 MoE kernel."""
import
logging
from
typing
import
Optional
import
torch
...
...
python/sglang/srt/layers/moe/ep_moe/kernels.py
View file @
62797440
import
logging
from
typing
import
List
,
Optional
import
torch
import
triton
from
sglang.srt.layers.quantization.fp8_kernel
import
per_token_group_quant_fp8
from
sglang.srt.utils
import
ceil_div
,
dispose_tensor
,
is_cuda
from
sglang.utils
import
is_in_ci
from
sglang.srt.utils
import
ceil_div
,
is_cuda
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
View file @
62797440
from
typing
import
Any
,
Dict
,
Optional
,
Union
from
typing
import
Optional
,
Union
import
torch
from
flashinfer.cute_dsl.blockscaled_gemm
import
grouped_gemm_nt_masked
...
...
python/sglang/srt/layers/moe/fused_moe_triton/layer.py
View file @
62797440
...
...
@@ -43,13 +43,7 @@ from sglang.srt.utils import (
)
if
is_flashinfer_available
():
from
flashinfer
import
(
RoutingMethodType
,
fp4_quantize
,
reorder_rows_for_gated_act_gemm
,
shuffle_matrix_a
,
shuffle_matrix_sf_a
,
)
from
flashinfer
import
RoutingMethodType
,
fp4_quantize
_is_hip
=
is_hip
()
_is_cpu_amx_available
=
cpu_has_amx_support
()
...
...
python/sglang/srt/layers/moe/moe_runner/triton.py
View file @
62797440
...
...
@@ -51,7 +51,9 @@ elif _is_hip:
if
_is_cuda
or
_is_hip
:
from
sgl_kernel
import
moe_align_block_size
as
sgl_moe_align_block_size
from
sgl_kernel
import
(
# noqa: F401
moe_align_block_size
as
sgl_moe_align_block_size
,
)
@
dataclass
...
...
python/sglang/srt/layers/moe/rocm_moe_utils.py
View file @
62797440
...
...
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
enum
import
IntEnum
from
functools
import
cache
from
typing
import
Optional
import
torch
...
...
python/sglang/srt/layers/moe/token_dispatcher/deepep.py
View file @
62797440
...
...
@@ -3,7 +3,7 @@ from __future__ import annotations
import
logging
from
contextlib
import
nullcontext
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
NamedTuple
,
Optional
,
Tuple
,
Union
from
typing
import
TYPE_CHECKING
,
List
,
NamedTuple
,
Optional
,
Tuple
,
Union
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.layers.moe.token_dispatcher.base
import
(
...
...
python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
View file @
62797440
...
...
@@ -22,7 +22,7 @@ try:
except
ImportError
:
use_mooncake_ep
=
False
from
enum
import
Enum
,
IntEnum
,
auto
from
enum
import
Enum
,
auto
import
torch
import
torch.distributed
as
dist
...
...
python/sglang/srt/layers/quantization/awq.py
View file @
62797440
...
...
@@ -3,7 +3,7 @@ from __future__ import annotations
import
logging
import
warnings
from
typing
import
TYPE_CHECKING
,
Any
,
Callable
,
Dict
,
List
,
Optional
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
import
torch
...
...
python/sglang/srt/layers/quantization/base_config.py
View file @
62797440
...
...
@@ -3,7 +3,6 @@ from __future__ import annotations
import
inspect
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Type
import
torch
...
...
python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
View file @
62797440
...
...
@@ -5,7 +5,7 @@ from __future__ import annotations
import
enum
import
logging
from
enum
import
Enum
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
typing
import
TYPE_CHECKING
,
List
import
torch
from
compressed_tensors
import
CompressionFormat
...
...
@@ -21,14 +21,7 @@ from sglang.srt.layers.quantization.utils import (
per_tensor_dequantize
,
replace_parameter
,
)
from
sglang.srt.utils
import
(
get_bool_env_var
,
is_cpu
,
is_cuda
,
is_hip
,
is_npu
,
set_weight_attrs
,
)
from
sglang.srt.utils
import
get_bool_env_var
,
is_hip
,
set_weight_attrs
if
TYPE_CHECKING
:
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
...
...
@@ -49,7 +42,7 @@ if _use_aiter:
from
sglang.srt.layers.moe.rocm_moe_utils
import
rocm_fused_experts_tkw1
try
:
import
vllm
import
vllm
# noqa: F401
VLLM_AVAILABLE
=
True
except
ImportError
:
...
...
python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
View file @
62797440
...
...
@@ -12,7 +12,7 @@ def _compute_enable_deep_gemm():
return
False
try
:
import
deep_gemm
import
deep_gemm
# noqa: F401
except
ImportError
:
return
False
...
...
python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
View file @
62797440
...
...
@@ -5,7 +5,7 @@ from typing import Tuple
import
torch
from
sglang.srt.layers.quantization.deep_gemm_wrapper
import
compile_utils
from
sglang.srt.layers.quantization.deep_gemm_wrapper.configurer
import
(
from
sglang.srt.layers.quantization.deep_gemm_wrapper.configurer
import
(
# noqa: F401
DEEPGEMM_BLACKWELL
,
DEEPGEMM_SCALE_UE8M0
,
ENABLE_JIT_DEEPGEMM
,
...
...
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
if
ENABLE_JIT_DEEPGEMM
:
import
deep_gemm
from
deep_gemm.utils.layout
import
get_mn_major_tma_aligned_tensor
from
deep_gemm.utils.layout
import
get_mn_major_tma_aligned_tensor
# noqa: F401
_SANITY_CHECK
=
get_bool_env_var
(
"SGLANG_DEEPGEMM_SANITY_CHECK"
)
...
...
python/sglang/srt/layers/quantization/fp8_kernel.py
View file @
62797440
...
...
@@ -67,7 +67,7 @@ if _is_hip:
raise
ImportError
(
"aiter is required when SGLANG_USE_AITER is set to True"
)
else
:
try
:
import
vllm._C
import
vllm._C
# noqa: F401
except
ImportError
:
raise
ImportError
(
"vllm is required when SGLANG_USE_AITER is set to False"
)
...
...
python/sglang/srt/layers/quantization/fpgemm_fp8.py
View file @
62797440
...
...
@@ -11,7 +11,6 @@ from torch.nn.parameter import Parameter
from
sglang.srt.layers.linear
import
LinearBase
from
sglang.srt.layers.parameter
import
ChannelQuantScaleParameter
,
ModelWeightParameter
from
sglang.srt.layers.quantization.base_config
import
(
FusedMoEMethodBase
,
LinearMethodBase
,
QuantizationConfig
,
QuantizeMethodBase
,
...
...
@@ -28,7 +27,7 @@ from sglang.srt.layers.quantization.marlin_utils_fp8 import (
prepare_fp8_layer_for_marlin
,
)
from
sglang.srt.layers.quantization.unquant
import
UnquantizedLinearMethod
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
,
replace_parameter
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
from
sglang.srt.utils
import
get_bool_env_var
,
is_cuda
_is_cuda
=
is_cuda
()
...
...
python/sglang/srt/layers/quantization/gptq.py
View file @
62797440
...
...
@@ -199,7 +199,6 @@ class GPTQConfig(QuantizationConfig):
self
,
layer
:
torch
.
nn
.
Module
,
prefix
:
str
)
->
Optional
[
LinearMethodBase
]:
# Delay the import to avoid circular dependency
from
sglang.srt.layers.linear
import
LinearBase
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
if
isinstance
(
layer
,
FusedMoE
):
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment