Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
62797440
Unverified
Commit
62797440
authored
Oct 17, 2025
by
Chang Su
Committed by
GitHub
Oct 17, 2025
Browse files
[Lint] Add `python/sglang` to ruff F401 checks and remove unused imports in files (#11685)
parent
2614adf9
Changes
150
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
19 additions
and
101 deletions
+19
-101
python/sglang/srt/models/gemma3n_mm.py
python/sglang/srt/models/gemma3n_mm.py
+1
-2
python/sglang/srt/models/glm4_moe.py
python/sglang/srt/models/glm4_moe.py
+0
-6
python/sglang/srt/models/gpt_oss.py
python/sglang/srt/models/gpt_oss.py
+1
-1
python/sglang/srt/models/hunyuan.py
python/sglang/srt/models/hunyuan.py
+2
-7
python/sglang/srt/models/interns1.py
python/sglang/srt/models/interns1.py
+0
-1
python/sglang/srt/models/llama_eagle3.py
python/sglang/srt/models/llama_eagle3.py
+1
-1
python/sglang/srt/models/longcat_flash.py
python/sglang/srt/models/longcat_flash.py
+1
-14
python/sglang/srt/models/longcat_flash_nextn.py
python/sglang/srt/models/longcat_flash_nextn.py
+2
-13
python/sglang/srt/models/mimo.py
python/sglang/srt/models/mimo.py
+2
-13
python/sglang/srt/models/mimo_mtp.py
python/sglang/srt/models/mimo_mtp.py
+1
-2
python/sglang/srt/models/minicpmo.py
python/sglang/srt/models/minicpmo.py
+0
-3
python/sglang/srt/models/mixtral.py
python/sglang/srt/models/mixtral.py
+0
-1
python/sglang/srt/models/opt.py
python/sglang/srt/models/opt.py
+1
-5
python/sglang/srt/models/phi.py
python/sglang/srt/models/phi.py
+1
-1
python/sglang/srt/models/phi4mm.py
python/sglang/srt/models/phi4mm.py
+1
-1
python/sglang/srt/models/phimoe.py
python/sglang/srt/models/phimoe.py
+0
-1
python/sglang/srt/models/pixtral.py
python/sglang/srt/models/pixtral.py
+0
-3
python/sglang/srt/models/qwen.py
python/sglang/srt/models/qwen.py
+0
-1
python/sglang/srt/models/qwen2_audio.py
python/sglang/srt/models/qwen2_audio.py
+2
-15
python/sglang/srt/models/qwen3_next.py
python/sglang/srt/models/qwen3_next.py
+3
-10
No files found.
python/sglang/srt/models/gemma3n_mm.py
View file @
62797440
...
@@ -14,8 +14,7 @@ from transformers import (
...
@@ -14,8 +14,7 @@ from transformers import (
)
)
from
transformers.models.auto.modeling_auto
import
AutoModel
from
transformers.models.auto.modeling_auto
import
AutoModel
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
RowParallelLinear
from
sglang.srt.layers.linear
import
ColumnParallelLinear
,
RowParallelLinear
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
sglang.srt.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
...
...
python/sglang/srt/models/glm4_moe.py
View file @
62797440
...
@@ -44,10 +44,8 @@ from sglang.srt.layers.dp_attention import (
...
@@ -44,10 +44,8 @@ from sglang.srt.layers.dp_attention import (
)
)
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
RowParallelLinear
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
...
@@ -78,16 +76,12 @@ from sglang.srt.utils import (
...
@@ -78,16 +76,12 @@ from sglang.srt.utils import (
BumpAllocator
,
BumpAllocator
,
LazyValue
,
LazyValue
,
add_prefix
,
add_prefix
,
bind_or_assign
,
cpu_has_amx_support
,
cpu_has_amx_support
,
get_bool_env_var
,
get_bool_env_var
,
get_device_sm
,
get_device_sm
,
get_int_env_var
,
is_cpu
,
is_cpu
,
is_cuda
,
is_cuda
,
is_flashinfer_available
,
is_hip
,
is_hip
,
is_non_idle_and_non_empty
,
log_info_on_rank0
,
log_info_on_rank0
,
use_intel_amx_backend
,
use_intel_amx_backend
,
)
)
...
...
python/sglang/srt/models/gpt_oss.py
View file @
62797440
...
@@ -85,7 +85,7 @@ _is_sm100_supported = is_cuda() and is_sm100_supported()
...
@@ -85,7 +85,7 @@ _is_sm100_supported = is_cuda() and is_sm100_supported()
if
_is_cuda
:
if
_is_cuda
:
from
sgl_kernel
import
FusedSetKVBufferArg
from
sgl_kernel
import
FusedSetKVBufferArg
# noqa: F401
class
GptOssConfig
(
PretrainedConfig
):
class
GptOssConfig
(
PretrainedConfig
):
...
...
python/sglang/srt/models/hunyuan.py
View file @
62797440
...
@@ -12,18 +12,14 @@
...
@@ -12,18 +12,14 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""Inference-only HunYuan model compatible with HuggingFace weights."""
"""Inference-only HunYuan model compatible with HuggingFace weights."""
import
logging
import
re
import
re
from
dataclasses
import
dataclass
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
from
enum
import
Enum
,
auto
from
typing
import
Any
,
Dict
,
Iterable
,
List
,
Optional
,
Tuple
,
Union
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
...
@@ -46,7 +42,6 @@ from sglang.srt.layers.radix_attention import RadixAttention
...
@@ -46,7 +42,6 @@ from sglang.srt.layers.radix_attention import RadixAttention
from
sglang.srt.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.sampler
import
Sampler
from
sglang.srt.layers.sampler
import
Sampler
from
sglang.srt.layers.vocab_parallel_embedding
import
(
from
sglang.srt.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
ParallelLMHead
,
VocabParallelEmbedding
,
VocabParallelEmbedding
,
)
)
...
@@ -56,7 +51,7 @@ from sglang.srt.model_loader.weight_utils import (
...
@@ -56,7 +51,7 @@ from sglang.srt.model_loader.weight_utils import (
kv_cache_scales_loader
,
kv_cache_scales_loader
,
maybe_remap_kv_scale_name
,
maybe_remap_kv_scale_name
,
)
)
from
sglang.srt.utils
import
add_prefix
,
is_hip
from
sglang.srt.utils
import
is_hip
expert_distribution_recorder
=
ExpertDistributionRecorder
()
expert_distribution_recorder
=
ExpertDistributionRecorder
()
...
...
python/sglang/srt/models/interns1.py
View file @
62797440
...
@@ -5,7 +5,6 @@ from torch import nn
...
@@ -5,7 +5,6 @@ from torch import nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
sglang.srt.layers.attention
import
vision_utils
from
sglang.srt.layers.attention
import
vision_utils
from
sglang.srt.layers.moe.ep_moe.layer
import
get_moe_impl_class
from
sglang.srt.layers.moe.fused_moe_triton.layer
import
FusedMoE
from
sglang.srt.layers.moe.fused_moe_triton.layer
import
FusedMoE
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.managers.mm_utils
import
(
from
sglang.srt.managers.mm_utils
import
(
...
...
python/sglang/srt/models/llama_eagle3.py
View file @
62797440
...
@@ -27,7 +27,7 @@ from transformers import LlamaConfig
...
@@ -27,7 +27,7 @@ from transformers import LlamaConfig
from
sglang.srt.distributed
import
get_pp_group
from
sglang.srt.distributed
import
get_pp_group
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
sglang.srt.layers.linear
import
QKVParallelLinear
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.vocab_parallel_embedding
import
(
from
sglang.srt.layers.vocab_parallel_embedding
import
(
...
...
python/sglang/srt/models/longcat_flash.py
View file @
62797440
...
@@ -44,9 +44,7 @@ from sglang.srt.distributed import (
...
@@ -44,9 +44,7 @@ from sglang.srt.distributed import (
)
)
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.eplb.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.amx_utils
import
PackWeightMethod
from
sglang.srt.layers.communicator
import
LayerCommunicator
,
LayerScatterModes
from
sglang.srt.layers.communicator
import
LayerCommunicator
,
LayerScatterModes
from
sglang.srt.layers.dp_attention
import
(
from
sglang.srt.layers.dp_attention
import
(
get_attention_tp_rank
,
get_attention_tp_rank
,
...
@@ -87,20 +85,15 @@ from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
...
@@ -87,20 +85,15 @@ from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
from
sglang.srt.server_args
import
get_global_server_args
from
sglang.srt.server_args
import
get_global_server_args
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
BumpAllocator
,
BumpAllocator
,
LazyValue
,
add_prefix
,
add_prefix
,
bind_or_assign
,
bind_or_assign
,
cpu_has_amx_support
,
cpu_has_amx_support
,
get_bool_env_var
,
get_bool_env_var
,
get_device_sm
,
get_device_sm
,
get_int_env_var
,
is_cpu
,
is_cpu
,
is_cuda
,
is_cuda
,
is_flashinfer_available
,
is_hip
,
is_hip
,
is_non_idle_and_non_empty
,
is_npu
,
is_npu
,
is_sm100_supported
,
)
)
_is_hip
=
is_hip
()
_is_hip
=
is_hip
()
...
@@ -113,13 +106,7 @@ _is_cpu = is_cpu()
...
@@ -113,13 +106,7 @@ _is_cpu = is_cpu()
_device_sm
=
get_device_sm
()
_device_sm
=
get_device_sm
()
if
_is_cuda
:
if
_is_cuda
:
from
sgl_kernel
import
(
from
sgl_kernel
import
awq_dequantize
awq_dequantize
,
bmm_fp8
,
dsv3_fused_a_gemm
,
dsv3_router_gemm
,
merge_state_v2
,
)
elif
_is_cpu
and
_is_cpu_amx_available
:
elif
_is_cpu
and
_is_cpu_amx_available
:
pass
pass
elif
_is_hip
:
elif
_is_hip
:
...
...
python/sglang/srt/models/longcat_flash_nextn.py
View file @
62797440
...
@@ -32,14 +32,10 @@
...
@@ -32,14 +32,10 @@
import
concurrent.futures
import
concurrent.futures
import
logging
import
logging
import
os
from
typing
import
Iterable
,
Optional
,
Tuple
from
enum
import
IntEnum
,
auto
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
,
Union
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
tqdm
import
tqdm
from
sglang.srt.configs
import
LongcatFlashConfig
from
sglang.srt.configs
import
LongcatFlashConfig
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
...
@@ -75,7 +71,6 @@ from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
...
@@ -75,7 +71,6 @@ from sglang.srt.models.deepseek_v2 import DeepseekV2AttentionMLA
from
sglang.srt.models.longcat_flash
import
LongcatFlashForCausalLM
,
LongcatFlashMLP
from
sglang.srt.models.longcat_flash
import
LongcatFlashForCausalLM
,
LongcatFlashMLP
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
BumpAllocator
,
BumpAllocator
,
LazyValue
,
add_prefix
,
add_prefix
,
bind_or_assign
,
bind_or_assign
,
cpu_has_amx_support
,
cpu_has_amx_support
,
...
@@ -97,13 +92,7 @@ _is_cpu = is_cpu()
...
@@ -97,13 +92,7 @@ _is_cpu = is_cpu()
_device_sm
=
get_device_sm
()
_device_sm
=
get_device_sm
()
if
_is_cuda
:
if
_is_cuda
:
from
sgl_kernel
import
(
from
sgl_kernel
import
awq_dequantize
awq_dequantize
,
bmm_fp8
,
dsv3_fused_a_gemm
,
dsv3_router_gemm
,
merge_state_v2
,
)
elif
_is_cpu
and
_is_cpu_amx_available
:
elif
_is_cpu
and
_is_cpu_amx_available
:
pass
pass
elif
_is_hip
:
elif
_is_hip
:
...
...
python/sglang/srt/models/mimo.py
View file @
62797440
# Adapted from qwen2.py
# Adapted from qwen2.py
from
functools
import
partial
from
typing
import
Iterable
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
split_tensor_along_last_dim
,
tensor_model_parallel_all_gather
,
)
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
QKVParallelLinear
,
RowParallelLinear
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.pooler
import
Pooler
,
PoolingType
from
sglang.srt.layers.pooler
import
Pooler
,
PoolingType
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.vocab_parallel_embedding
import
ParallelLMHead
from
sglang.srt.layers.vocab_parallel_embedding
import
ParallelLMHead
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.models.qwen2
import
Qwen2DecoderLayer
,
Qwen2MLP
,
Qwen2Model
from
sglang.srt.models.qwen2
import
Qwen2DecoderLayer
,
Qwen2Model
from
sglang.srt.utils
import
add_prefix
from
sglang.srt.utils
import
add_prefix
MiMoConfig
=
None
MiMoConfig
=
None
...
...
python/sglang/srt/models/mimo_mtp.py
View file @
62797440
# Adapted from https://github.com/vllm-project/vllm/pull/17433/files and deepseek_nextn.py
# Adapted from https://github.com/vllm-project/vllm/pull/17433/files and deepseek_nextn.py
from
functools
import
partial
from
typing
import
Iterable
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
...
...
python/sglang/srt/models/minicpmo.py
View file @
62797440
...
@@ -43,7 +43,6 @@ from sglang.srt.managers.mm_utils import (
...
@@ -43,7 +43,6 @@ from sglang.srt.managers.mm_utils import (
general_mm_embed_routine
,
general_mm_embed_routine
,
)
)
from
sglang.srt.managers.schedule_batch
import
(
from
sglang.srt.managers.schedule_batch
import
(
Modality
,
MultimodalDataItem
,
MultimodalDataItem
,
MultimodalInputs
,
MultimodalInputs
,
flatten_nested_list
,
flatten_nested_list
,
...
@@ -59,8 +58,6 @@ from sglang.srt.utils import logger
...
@@ -59,8 +58,6 @@ from sglang.srt.utils import logger
try
:
try
:
from
transformers
import
LogitsWarper
from
transformers
import
LogitsWarper
from
vector_quantize_pytorch
import
GroupedResidualFSQ
from
vector_quantize_pytorch
import
GroupedResidualFSQ
from
vocos
import
Vocos
from
vocos.pretrained
import
instantiate_class
_tts_deps
=
True
_tts_deps
=
True
except
:
except
:
...
...
python/sglang/srt/models/mixtral.py
View file @
62797440
...
@@ -24,7 +24,6 @@ from torch import nn
...
@@ -24,7 +24,6 @@ from torch import nn
from
transformers
import
MixtralConfig
from
transformers
import
MixtralConfig
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
get_moe_expert_parallel_world_size
,
get_pp_group
,
get_pp_group
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
...
...
python/sglang/srt/models/opt.py
View file @
62797440
...
@@ -17,7 +17,6 @@ from collections.abc import Iterable
...
@@ -17,7 +17,6 @@ from collections.abc import Iterable
from
typing
import
Optional
,
Union
from
typing
import
Optional
,
Union
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
OPTConfig
from
transformers
import
OPTConfig
...
@@ -26,10 +25,8 @@ from sglang.srt.distributed import (
...
@@ -26,10 +25,8 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
sglang.srt.layers.activation
import
get_act_fn
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
ReplicatedLinear
,
ReplicatedLinear
,
RowParallelLinear
,
RowParallelLinear
,
...
@@ -38,7 +35,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorO
...
@@ -38,7 +35,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorO
from
sglang.srt.layers.pooler
import
Pooler
,
PoolingType
from
sglang.srt.layers.pooler
import
Pooler
,
PoolingType
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.utils
import
PPMissingLayer
,
get_layer_id
from
sglang.srt.layers.utils
import
get_layer_id
from
sglang.srt.layers.vocab_parallel_embedding
import
(
from
sglang.srt.layers.vocab_parallel_embedding
import
(
ParallelLMHead
,
ParallelLMHead
,
VocabParallelEmbedding
,
VocabParallelEmbedding
,
...
@@ -47,7 +44,6 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTe
...
@@ -47,7 +44,6 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTe
from
sglang.srt.model_loader.weight_utils
import
(
from
sglang.srt.model_loader.weight_utils
import
(
default_weight_loader
,
default_weight_loader
,
kv_cache_scales_loader
,
kv_cache_scales_loader
,
maybe_remap_kv_scale_name
,
)
)
from
sglang.srt.utils
import
add_prefix
,
make_layers
from
sglang.srt.utils
import
add_prefix
,
make_layers
...
...
python/sglang/srt/models/phi.py
View file @
62797440
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/phi.py
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/phi.py
from
typing
import
Iterable
,
Optional
,
Union
from
typing
import
Iterable
,
Optional
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
...
...
python/sglang/srt/models/phi4mm.py
View file @
62797440
...
@@ -24,7 +24,7 @@ from typing import List, Optional, Tuple
...
@@ -24,7 +24,7 @@ from typing import List, Optional, Tuple
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
,
SiglipVisionConfig
from
transformers
import
PretrainedConfig
from
sglang.srt.layers.quantization
import
QuantizationConfig
from
sglang.srt.layers.quantization
import
QuantizationConfig
from
sglang.srt.managers.mm_utils
import
(
from
sglang.srt.managers.mm_utils
import
(
...
...
python/sglang/srt/models/phimoe.py
View file @
62797440
...
@@ -18,7 +18,6 @@ from sglang.srt.layers.pooler import Pooler, PoolingType
...
@@ -18,7 +18,6 @@ from sglang.srt.layers.pooler import Pooler, PoolingType
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.utils
import
PPMissingLayer
from
sglang.srt.layers.vocab_parallel_embedding
import
(
from
sglang.srt.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
ParallelLMHead
,
...
...
python/sglang/srt/models/pixtral.py
View file @
62797440
...
@@ -16,13 +16,10 @@
...
@@ -16,13 +16,10 @@
Using mistral-community/pixtral-12b as reference.
Using mistral-community/pixtral-12b as reference.
"""
"""
import
logging
import
math
from
typing
import
Iterable
,
List
,
Optional
,
Set
,
Tuple
,
Union
from
typing
import
Iterable
,
List
,
Optional
,
Set
,
Tuple
,
Union
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
transformers
import
PixtralVisionConfig
,
PretrainedConfig
from
transformers
import
PixtralVisionConfig
,
PretrainedConfig
from
transformers.models.pixtral.modeling_pixtral
import
PixtralRotaryEmbedding
from
transformers.models.pixtral.modeling_pixtral
import
PixtralRotaryEmbedding
from
transformers.models.pixtral.modeling_pixtral
import
(
from
transformers.models.pixtral.modeling_pixtral
import
(
...
...
python/sglang/srt/models/qwen.py
View file @
62797440
...
@@ -15,7 +15,6 @@
...
@@ -15,7 +15,6 @@
# Adapted from
# Adapted from
# https://github.com/vllm-project/vllm/blob/c7f2cf2b7f67bce5842fedfdba508440fe257375/vllm/model_executor/models/qwen.py#L1
# https://github.com/vllm-project/vllm/blob/c7f2cf2b7f67bce5842fedfdba508440fe257375/vllm/model_executor/models/qwen.py#L1
import
time
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
import
torch
import
torch
...
...
python/sglang/srt/models/qwen2_audio.py
View file @
62797440
...
@@ -23,30 +23,18 @@
...
@@ -23,30 +23,18 @@
# limitations under the License.
# limitations under the License.
"""Inference-only Qwen2-Audio model compatible with HuggingFace weights."""
"""Inference-only Qwen2-Audio model compatible with HuggingFace weights."""
import
logging
import
logging
import
math
from
typing
import
Any
,
Iterable
,
List
,
Optional
,
Tuple
from
functools
import
lru_cache
,
partial
from
typing
import
Any
,
Iterable
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
transformers
import
Qwen2AudioEncoderConfig
,
Qwen2Config
from
einops
import
rearrange
from
transformers
import
AutoTokenizer
,
Qwen2AudioEncoderConfig
,
Qwen2Config
from
transformers.activations
import
ACT2FN
from
transformers.models.qwen2_audio.configuration_qwen2_audio
import
Qwen2AudioConfig
from
transformers.models.qwen2_audio.configuration_qwen2_audio
import
Qwen2AudioConfig
from
transformers.models.qwen2_audio.modeling_qwen2_audio
import
(
from
transformers.models.qwen2_audio.modeling_qwen2_audio
import
(
Qwen2AudioEncoder
,
Qwen2AudioEncoder
,
Qwen2AudioMultiModalProjector
,
Qwen2AudioMultiModalProjector
,
)
)
from
sglang.srt.layers.activation
import
QuickGELU
from
sglang.srt.layers.attention.vision
import
VisionAttention
from
sglang.srt.layers.linear
import
ColumnParallelLinear
,
RowParallelLinear
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.pooler
import
Pooler
,
PoolingType
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.utils
import
get_layer_id
from
sglang.srt.layers.vocab_parallel_embedding
import
ParallelLMHead
from
sglang.srt.managers.mm_utils
import
(
from
sglang.srt.managers.mm_utils
import
(
MultiModalityDataPaddingPatternMultimodalTokens
,
MultiModalityDataPaddingPatternMultimodalTokens
,
general_mm_embed_routine
,
general_mm_embed_routine
,
...
@@ -60,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
...
@@ -60,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.models.qwen2
import
Qwen2ForCausalLM
from
sglang.srt.models.qwen2
import
Qwen2ForCausalLM
from
sglang.srt.utils
import
add_prefix
from
sglang.srt.utils
import
add_prefix
from
sglang.srt.utils.hf_transformers_utils
import
get_processor
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/models/qwen3_next.py
View file @
62797440
import
enum
import
enum
import
logging
import
logging
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Set
,
Tuple
from
typing
import
Any
,
Iterable
,
Optional
,
Set
,
Tuple
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
sglang.srt.configs.qwen3_next
import
Qwen3NextConfig
from
sglang.srt.configs.qwen3_next
import
Qwen3NextConfig
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
divide
,
get_pp_group
divide
,
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.layers.attention.fla.layernorm_gated
import
RMSNorm
as
RMSNormGated
from
sglang.srt.layers.attention.fla.layernorm_gated
import
RMSNorm
as
RMSNormGated
...
@@ -23,10 +17,9 @@ from sglang.srt.layers.dp_attention import (
...
@@ -23,10 +17,9 @@ from sglang.srt.layers.dp_attention import (
get_attention_tp_size
,
get_attention_tp_size
,
is_dp_attention_enabled
,
is_dp_attention_enabled
,
)
)
from
sglang.srt.layers.layernorm
import
GemmaRMSNorm
,
RMSNorm
from
sglang.srt.layers.layernorm
import
GemmaRMSNorm
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
RowParallelLinear
,
RowParallelLinear
,
)
)
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment