Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
62797440
Unverified
Commit
62797440
authored
Oct 17, 2025
by
Chang Su
Committed by
GitHub
Oct 17, 2025
Browse files
[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)
parent
2614adf9
Changes
150
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
23 additions
and
67 deletions
+23
-67
python/sglang/srt/layers/quantization/int8_kernel.py
python/sglang/srt/layers/quantization/int8_kernel.py
+1
-1
python/sglang/srt/layers/quantization/modelopt_quant.py
python/sglang/srt/layers/quantization/modelopt_quant.py
+1
-10
python/sglang/srt/layers/quantization/petit.py
python/sglang/srt/layers/quantization/petit.py
+1
-1
python/sglang/srt/layers/quantization/quark/quark_moe.py
python/sglang/srt/layers/quantization/quark/quark_moe.py
+3
-3
python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
...srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
+0
-7
python/sglang/srt/layers/quantization/utils.py
python/sglang/srt/layers/quantization/utils.py
+0
-1
python/sglang/srt/layers/quantization/w4afp8.py
python/sglang/srt/layers/quantization/w4afp8.py
+4
-5
python/sglang/srt/layers/quantization/w8a8_int8.py
python/sglang/srt/layers/quantization/w8a8_int8.py
+2
-18
python/sglang/srt/layers/utils.py
python/sglang/srt/layers/utils.py
+0
-1
python/sglang/srt/lora/backend/triton_backend.py
python/sglang/srt/lora/backend/triton_backend.py
+0
-1
python/sglang/srt/lora/eviction_policy.py
python/sglang/srt/lora/eviction_policy.py
+1
-1
python/sglang/srt/lora/lora_manager.py
python/sglang/srt/lora/lora_manager.py
+1
-1
python/sglang/srt/managers/cache_controller.py
python/sglang/srt/managers/cache_controller.py
+3
-4
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+1
-3
python/sglang/srt/managers/scheduler_metrics_mixin.py
python/sglang/srt/managers/scheduler_metrics_mixin.py
+1
-4
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+0
-1
python/sglang/srt/managers/utils.py
python/sglang/srt/managers/utils.py
+1
-2
python/sglang/srt/mem_cache/allocator_ascend.py
python/sglang/srt/mem_cache/allocator_ascend.py
+1
-1
python/sglang/srt/mem_cache/base_prefix_cache.py
python/sglang/srt/mem_cache/base_prefix_cache.py
+1
-1
python/sglang/srt/mem_cache/evict_policy.py
python/sglang/srt/mem_cache/evict_policy.py
+1
-1
No files found.
python/sglang/srt/layers/quantization/int8_kernel.py
View file @
62797440
...
...
@@ -8,7 +8,7 @@ import torch
import
triton
import
triton.language
as
tl
from
sglang.srt.utils
import
get_bool_env_var
,
get_device_name
,
is_cuda
from
sglang.srt.utils
import
get_device_name
,
is_cuda
_is_cuda
=
is_cuda
()
if
_is_cuda
:
...
...
python/sglang/srt/layers/quantization/modelopt_quant.py
View file @
62797440
...
...
@@ -1059,16 +1059,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
intermediate_size
,
num_experts
,
):
from
flashinfer
import
(
RoutingMethodType
,
e2m1_and_ufp8sf_scale_to_float
,
fp4_quantize
,
next_positive_power_of_2
,
nvfp4_block_scale_interleave
,
reorder_rows_for_gated_act_gemm
,
shuffle_matrix_a
,
shuffle_matrix_sf_a
,
)
from
flashinfer
import
nvfp4_block_scale_interleave
from
flashinfer.fused_moe.core
import
(
_maybe_get_cached_w2_permute_indices
,
_maybe_get_cached_w3_w1_permute_indices
,
...
...
python/sglang/srt/layers/quantization/petit.py
View file @
62797440
...
...
@@ -2,7 +2,7 @@
import
logging
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
from
typing
import
Any
,
Dict
,
List
,
Optional
import
regex
as
re
import
torch
...
...
python/sglang/srt/layers/quantization/quark/quark_moe.py
View file @
62797440
...
...
@@ -3,16 +3,16 @@
from
__future__
import
annotations
import
logging
from
typing
import
TYPE_CHECKING
,
Any
,
Callable
,
Optional
from
typing
import
TYPE_CHECKING
,
Any
import
torch
from
aiter
import
ActivationType
,
QuantType
,
biased_grouped_topk
from
aiter
import
ActivationType
,
QuantType
from
aiter.fused_moe
import
fused_moe
from
aiter.utility.fp4_utils
import
e8m0_shuffle
from
sglang.srt.layers.moe
import
MoeRunnerConfig
from
sglang.srt.layers.quantization.base_config
import
FusedMoEMethodBase
from
sglang.srt.utils
import
get_bool_env_var
,
is_hip
,
mxfp_supported
,
set_weight_attrs
from
sglang.srt.utils
import
is_hip
,
set_weight_attrs
if
TYPE_CHECKING
:
from
sglang.srt.layers.moe.token_dispatcher
import
(
...
...
python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
View file @
62797440
...
...
@@ -2,20 +2,13 @@
from
typing
import
Any
,
Callable
,
Optional
import
aiter
import
torch
import
torch.nn.functional
as
F
from
aiter.ops.gemm_op_a4w4
import
gemm_a4w4
from
aiter.ops.shuffle
import
shuffle_weight
from
aiter.ops.triton.gemm_afp4wfp4
import
gemm_afp4wfp4
from
aiter.ops.triton.gemm_afp4wfp4_pre_quant_atomic
import
gemm_afp4wfp4_pre_quant
from
aiter.ops.triton.quant
import
dynamic_mxfp4_quant
from
aiter.utility
import
dtypes
from
aiter.utility.fp4_utils
import
e8m0_shuffle
from
sglang.srt.layers.parameter
import
GroupQuantScaleParameter
,
PackedvLLMParameter
from
sglang.srt.layers.quantization.quark.schemes
import
QuarkScheme
from
sglang.srt.utils
import
get_bool_env_var
__all__
=
[
"QuarkW4A4MXFP4"
]
...
...
python/sglang/srt/layers/quantization/utils.py
View file @
62797440
...
...
@@ -11,7 +11,6 @@ import numpy
import
torch
from
sglang.srt.layers.quantization.fp8_kernel
import
scaled_fp8_quant
from
sglang.srt.utils
import
is_cuda
if
TYPE_CHECKING
:
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
python/sglang/srt/layers/quantization/w4afp8.py
View file @
62797440
from
__future__
import
annotations
import
logging
from
typing
import
TYPE_CHECKING
,
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
import
torch
from
torch.nn
import
Module
from
torch.nn.parameter
import
Parameter
from
sglang.srt.distributed.parallel_state
import
get_moe_expert_parallel_world_size
from
sglang.srt.layers.linear
import
LinearBase
,
UnquantizedLinearMethod
from
sglang.srt.layers.linear
import
UnquantizedLinearMethod
from
sglang.srt.layers.quantization.base_config
import
(
FusedMoEMethodBase
,
QuantizationConfig
,
...
...
@@ -17,11 +16,11 @@ from sglang.srt.layers.quantization.base_config import (
from
sglang.srt.layers.quantization.fp8
import
Fp8LinearMethod
from
sglang.srt.layers.quantization.unquant
import
UnquantizedLinearMethod
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
from
sglang.srt.utils
import
is_npu
,
set_weight_attrs
from
sglang.srt.utils
import
set_weight_attrs
if
TYPE_CHECKING
:
from
sglang.srt.layers.moe
import
MoeRunnerConfig
from
sglang.srt.layers.moe.ep_moe.layer
import
DeepEPMoE
,
EPMoE
from
sglang.srt.layers.moe.ep_moe.layer
import
DeepEPMoE
from
sglang.srt.layers.moe.token_dispatcher
import
(
CombineInput
,
DeepEPNormalOutput
,
...
...
python/sglang/srt/layers/quantization/w8a8_int8.py
View file @
62797440
from
__future__
import
annotations
import
importlib
import
sys
from
types
import
MappingProxyType
from
typing
import
(
TYPE_CHECKING
,
Any
,
Callable
,
Dict
,
List
,
Mapping
,
Optional
,
Tuple
,
Union
,
cast
,
)
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Mapping
,
Optional
,
Tuple
,
Union
,
cast
import
torch
from
torch.nn.parameter
import
Parameter
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.amx_utils
import
_amx_process_weight_after_loading
from
sglang.srt.layers.moe
import
MoeRunner
,
MoeRunnerBackend
,
MoeRunnerConfig
from
sglang.srt.layers.moe.moe_runner.triton
import
TritonMoeQuantInfo
...
...
python/sglang/srt/layers/utils.py
View file @
62797440
import
logging
import
re
from
functools
import
lru_cache
import
torch
...
...
python/sglang/srt/lora/backend/triton_backend.py
View file @
62797440
...
...
@@ -11,7 +11,6 @@ from sglang.srt.lora.triton_ops import (
)
from
sglang.srt.lora.utils
import
LoRABatchInfo
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.server_args
import
ServerArgs
class
TritonLoRABackend
(
BaseLoRABackend
):
...
...
python/sglang/srt/lora/eviction_policy.py
View file @
62797440
...
...
@@ -20,7 +20,7 @@ import logging
import
time
from
abc
import
ABC
,
abstractmethod
from
collections
import
OrderedDict
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Set
from
typing
import
Optional
,
Set
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/lora_manager.py
View file @
62797440
...
...
@@ -16,7 +16,7 @@
# and "Punica: Multi-Tenant LoRA Serving"
import
logging
from
typing
import
Dict
,
Iterable
,
List
,
Optional
,
Set
,
Tuple
from
typing
import
Dict
,
Iterable
,
List
,
Optional
import
torch
...
...
python/sglang/srt/managers/cache_controller.py
View file @
62797440
...
...
@@ -14,11 +14,10 @@ limitations under the License.
"""
import
logging
import
math
import
threading
import
time
from
queue
import
Empty
,
Full
,
PriorityQueue
,
Queue
from
typing
import
TYPE_CHECKING
,
List
,
NamedTuple
,
Optional
,
Set
,
Tuple
from
queue
import
Empty
,
Full
,
Queue
from
typing
import
TYPE_CHECKING
,
List
,
NamedTuple
,
Optional
import
torch
...
...
@@ -41,7 +40,7 @@ from sglang.srt.layers.dp_attention import (
get_attention_tp_size
,
is_dp_attention_enabled
,
)
from
sglang.srt.mem_cache.memory_pool
import
MHATokenToKVPool
,
MLATokenToKVPool
from
sglang.srt.mem_cache.memory_pool
import
MLATokenToKVPool
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/managers/schedule_batch.py
View file @
62797440
...
...
@@ -59,11 +59,10 @@ from sglang.srt.mem_cache.allocator import (
SWATokenToKVPoolAllocator
,
)
from
sglang.srt.mem_cache.base_prefix_cache
import
BasePrefixCache
from
sglang.srt.mem_cache.chunk_cache
import
ChunkCache
,
SWAChunkCache
from
sglang.srt.mem_cache.chunk_cache
import
SWAChunkCache
from
sglang.srt.mem_cache.common
import
(
alloc_for_decode
,
alloc_for_extend
,
alloc_token_slots
,
evict_from_tree_cache
,
)
from
sglang.srt.mem_cache.mamba_radix_cache
import
MambaRadixCache
...
...
@@ -76,7 +75,6 @@ from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from
sglang.srt.sampling.sampling_params
import
SamplingParams
from
sglang.srt.server_args
import
ServerArgs
,
get_global_server_args
from
sglang.srt.utils
import
flatten_nested_list
from
sglang.srt.utils.common
import
next_power_of_2
if
TYPE_CHECKING
:
from
sglang.srt.configs.model_config
import
ModelConfig
...
...
python/sglang/srt/managers/scheduler_metrics_mixin.py
View file @
62797440
...
...
@@ -3,13 +3,10 @@ from __future__ import annotations
import
logging
import
time
from
collections
import
defaultdict
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
,
Union
import
torch
from
typing
import
TYPE_CHECKING
,
List
,
Optional
from
sglang.srt.disaggregation.kv_events
import
EventPublisherFactory
,
KVEventBatch
from
sglang.srt.disaggregation.utils
import
DisaggregationMode
from
sglang.srt.managers.io_struct
import
TokenizedGenerateReqInput
from
sglang.srt.managers.schedule_policy
import
PrefillAdder
from
sglang.srt.managers.scheduler
import
Req
,
ScheduleBatch
from
sglang.srt.metrics.collector
import
SchedulerMetricsCollector
,
SchedulerStats
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
62797440
...
...
@@ -16,7 +16,6 @@
import
asyncio
import
copy
import
dataclasses
import
json
import
logging
import
math
import
os
...
...
python/sglang/srt/managers/utils.py
View file @
62797440
from
__future__
import
annotations
import
logging
import
multiprocessing
as
mp
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
from
typing
import
TYPE_CHECKING
,
Optional
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.managers.schedule_batch
import
Req
...
...
python/sglang/srt/mem_cache/allocator_ascend.py
View file @
62797440
...
...
@@ -92,7 +92,7 @@ class AscendPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator):
)
if
num_new_pages_item
<
200
:
import
sgl_kernel_npu
import
sgl_kernel_npu
# noqa: F401
torch
.
ops
.
npu
.
alloc_extend
(
prefix_lens
,
...
...
python/sglang/srt/mem_cache/base_prefix_cache.py
View file @
62797440
from
abc
import
ABC
,
abstractmethod
from
typing
import
TYPE_CHECKING
,
Any
,
List
,
NamedTuple
,
Optional
,
Tuple
from
typing
import
TYPE_CHECKING
,
Any
,
NamedTuple
,
Optional
,
Tuple
import
torch
...
...
python/sglang/srt/mem_cache/evict_policy.py
View file @
62797440
from
__future__
import
annotations
from
abc
import
ABC
,
abstractmethod
from
typing
import
TYPE_CHECKING
,
List
,
Tuple
,
Union
from
typing
import
TYPE_CHECKING
,
Tuple
,
Union
if
TYPE_CHECKING
:
from
sglang.srt.mem_cache.radix_cache
import
TreeNode
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment