Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c16b33cc
Unverified
Commit
c16b33cc
authored
Mar 18, 2025
by
Yineng Zhang
Committed by
GitHub
Mar 18, 2025
Browse files
cleanup deps 3/n (#4541)
parent
2d004512
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
12 deletions
+16
-12
python/sglang/srt/layers/quantization/fp8.py
python/sglang/srt/layers/quantization/fp8.py
+0
-4
python/sglang/srt/models/deepseek_nextn.py
python/sglang/srt/models/deepseek_nextn.py
+2
-1
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+2
-1
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+12
-6
No files found.
python/sglang/srt/layers/quantization/fp8.py
View file @
c16b33cc
...
@@ -152,8 +152,6 @@ class Fp8Config(QuantizationConfig):
...
@@ -152,8 +152,6 @@ class Fp8Config(QuantizationConfig):
def
get_quant_method
(
def
get_quant_method
(
self
,
layer
:
torch
.
nn
.
Module
,
prefix
:
str
self
,
layer
:
torch
.
nn
.
Module
,
prefix
:
str
)
->
Optional
[
"QuantizeMethodBase"
]:
)
->
Optional
[
"QuantizeMethodBase"
]:
from
vllm.attention.layer
import
Attention
# Avoid circular import
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
if
isinstance
(
layer
,
LinearBase
):
if
isinstance
(
layer
,
LinearBase
):
...
@@ -162,8 +160,6 @@ class Fp8Config(QuantizationConfig):
...
@@ -162,8 +160,6 @@ class Fp8Config(QuantizationConfig):
return
Fp8LinearMethod
(
self
)
return
Fp8LinearMethod
(
self
)
elif
isinstance
(
layer
,
FusedMoE
):
elif
isinstance
(
layer
,
FusedMoE
):
return
Fp8MoEMethod
(
self
)
return
Fp8MoEMethod
(
self
)
elif
isinstance
(
layer
,
Attention
):
return
Fp8KVCacheMethod
(
self
)
return
None
return
None
def
get_scaled_act_names
(
self
)
->
List
[
str
]:
def
get_scaled_act_names
(
self
)
->
List
[
str
]:
...
...
python/sglang/srt/models/deepseek_nextn.py
View file @
c16b33cc
...
@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
...
@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm
import
_custom_ops
as
ops
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
ReplicatedLinear
from
sglang.srt.layers.linear
import
ReplicatedLinear
...
@@ -48,6 +47,8 @@ _is_cuda = is_cuda()
...
@@ -48,6 +47,8 @@ _is_cuda = is_cuda()
if
_is_cuda
:
if
_is_cuda
:
from
sgl_kernel
import
awq_dequantize
from
sgl_kernel
import
awq_dequantize
else
:
from
vllm
import
_custom_ops
as
ops
class
DeepseekModelNextN
(
nn
.
Module
):
class
DeepseekModelNextN
(
nn
.
Module
):
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
c16b33cc
...
@@ -23,7 +23,6 @@ import torch
...
@@ -23,7 +23,6 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm
import
_custom_ops
as
ops
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
...
@@ -75,6 +74,8 @@ _is_cuda = is_cuda()
...
@@ -75,6 +74,8 @@ _is_cuda = is_cuda()
if
_is_cuda
:
if
_is_cuda
:
from
sgl_kernel
import
awq_dequantize
,
bmm_fp8
from
sgl_kernel
import
awq_dequantize
,
bmm_fp8
else
:
from
vllm
import
_custom_ops
as
ops
class
DeepseekV2MLP
(
nn
.
Module
):
class
DeepseekV2MLP
(
nn
.
Module
):
...
...
python/sglang/srt/utils.py
View file @
c16b33cc
...
@@ -531,7 +531,10 @@ def load_image(image_file: Union[str, bytes]):
...
@@ -531,7 +531,10 @@ def load_image(image_file: Union[str, bytes]):
def
suppress_other_loggers
():
def
suppress_other_loggers
():
from
vllm.logger
import
logger
as
vllm_default_logger
try
:
from
vllm.logger
import
logger
as
vllm_default_logger
except
ImportError
:
return
vllm_default_logger
.
setLevel
(
logging
.
WARN
)
vllm_default_logger
.
setLevel
(
logging
.
WARN
)
logging
.
getLogger
(
"vllm.distributed.device_communicators.pynccl"
).
setLevel
(
logging
.
getLogger
(
"vllm.distributed.device_communicators.pynccl"
).
setLevel
(
...
@@ -620,11 +623,14 @@ def monkey_patch_p2p_access_check():
...
@@ -620,11 +623,14 @@ def monkey_patch_p2p_access_check():
def
monkey_patch_vllm_gguf_config
():
def
monkey_patch_vllm_gguf_config
():
from
vllm.model_executor.layers.quantization.gguf
import
(
try
:
GGUFConfig
,
from
vllm.model_executor.layers.quantization.gguf
import
(
GGUFEmbeddingMethod
,
GGUFConfig
,
GGUFLinearMethod
,
GGUFEmbeddingMethod
,
)
GGUFLinearMethod
,
)
except
ImportError
:
return
from
sglang.srt.layers.linear
import
LinearBase
from
sglang.srt.layers.linear
import
LinearBase
from
sglang.srt.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
sglang.srt.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment