Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
033c715b
Unverified
Commit
033c715b
authored
Jan 17, 2025
by
Yineng Zhang
Committed by
GitHub
Jan 17, 2025
Browse files
cleanup models dependencies 1/n (#2948)
parent
d06c1ab5
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
36 additions
and
46 deletions
+36
-46
python/sglang/srt/layers/moe/ep_moe/layer.py
python/sglang/srt/layers/moe/ep_moe/layer.py
+1
-1
python/sglang/srt/lora/lora.py
python/sglang/srt/lora/lora.py
+1
-9
python/sglang/srt/models/baichuan.py
python/sglang/srt/models/baichuan.py
+5
-5
python/sglang/srt/models/gpt2.py
python/sglang/srt/models/gpt2.py
+1
-2
python/sglang/srt/models/minicpm3.py
python/sglang/srt/models/minicpm3.py
+6
-6
python/sglang/srt/models/olmo2.py
python/sglang/srt/models/olmo2.py
+1
-1
python/sglang/srt/models/olmoe.py
python/sglang/srt/models/olmoe.py
+5
-6
python/sglang/srt/models/qwen2_vl.py
python/sglang/srt/models/qwen2_vl.py
+2
-2
python/sglang/srt/models/xverse.py
python/sglang/srt/models/xverse.py
+6
-6
python/sglang/srt/models/xverse_moe.py
python/sglang/srt/models/xverse_moe.py
+8
-8
No files found.
python/sglang/srt/layers/moe/ep_moe/layer.py
View file @
033c715b
...
@@ -5,7 +5,6 @@ import torch
...
@@ -5,7 +5,6 @@ import torch
from
torch.nn
import
Module
from
torch.nn
import
Module
from
vllm
import
_custom_ops
as
ops
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.custom_op
import
CustomOp
from
vllm.model_executor.custom_op
import
CustomOp
from
vllm.model_executor.layers.quantization.fp8
import
Fp8Config
,
Fp8MoEMethod
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
...
@@ -25,6 +24,7 @@ from sglang.srt.layers.quantization.base_config import (
...
@@ -25,6 +24,7 @@ from sglang.srt.layers.quantization.base_config import (
QuantizationConfig
,
QuantizationConfig
,
QuantizeMethodBase
,
QuantizeMethodBase
,
)
)
from
sglang.srt.layers.quantization.fp8
import
Fp8Config
,
Fp8MoEMethod
from
sglang.srt.utils
import
is_hip
,
set_weight_attrs
from
sglang.srt.utils
import
is_hip
,
set_weight_attrs
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
python/sglang/srt/lora/lora.py
View file @
033c715b
...
@@ -19,18 +19,11 @@
...
@@ -19,18 +19,11 @@
# https://github.com/vllm-project/vllm/blob/4abf6336ec65c270343eb895e7b18786e9274176/vllm/lora/layers.py
# https://github.com/vllm-project/vllm/blob/4abf6336ec65c270343eb895e7b18786e9274176/vllm/lora/layers.py
import
json
import
os
import
re
import
re
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
import
safetensors.torch
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
ParallelLMHead
,
VocabParallelEmbedding
,
)
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
ColumnParallelLinear
,
...
@@ -38,7 +31,6 @@ from sglang.srt.layers.linear import (
...
@@ -38,7 +31,6 @@ from sglang.srt.layers.linear import (
QKVParallelLinear
,
QKVParallelLinear
,
RowParallelLinear
,
RowParallelLinear
,
)
)
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
ForwardMode
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
...
...
python/sglang/srt/models/baichuan.py
View file @
033c715b
...
@@ -24,11 +24,6 @@ from typing import Iterable, Optional, Tuple
...
@@ -24,11 +24,6 @@ from typing import Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
RowParallelLinear
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
...
@@ -37,6 +32,11 @@ from sglang.srt.distributed import (
...
@@ -37,6 +32,11 @@ from sglang.srt.distributed import (
)
)
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
RowParallelLinear
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
...
...
python/sglang/srt/models/gpt2.py
View file @
033c715b
...
@@ -22,10 +22,9 @@ from typing import Iterable, List, Optional, Tuple
...
@@ -22,10 +22,9 @@ from typing import Iterable, List, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
GPT2Config
from
transformers
import
GPT2Config
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
sglang.srt.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
sglang.srt.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
get_act_fn
# from sglang.srt.layers.activation import get_act_fn
# from sglang.srt.layers.activation import get_act_fn
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/minicpm3.py
View file @
033c715b
...
@@ -19,17 +19,17 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -19,17 +19,17 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
...
...
python/sglang/srt/models/olmo2.py
View file @
033c715b
...
@@ -22,7 +22,6 @@ import torch
...
@@ -22,7 +22,6 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
...
@@ -45,6 +44,7 @@ from sglang.srt.layers.vocab_parallel_embedding import (
...
@@ -45,6 +44,7 @@ from sglang.srt.layers.vocab_parallel_embedding import (
VocabParallelEmbedding
,
VocabParallelEmbedding
,
)
)
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.utils
import
make_layers
from
sglang.srt.utils
import
make_layers
...
...
python/sglang/srt/models/olmoe.py
View file @
033c715b
...
@@ -23,12 +23,6 @@ import torch
...
@@ -23,12 +23,6 @@ import torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
...
@@ -37,6 +31,11 @@ from sglang.srt.distributed import (
...
@@ -37,6 +31,11 @@ from sglang.srt.distributed import (
)
)
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
QKVParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
,
LogitsProcessorOutput
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
,
LogitsProcessorOutput
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoE
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
python/sglang/srt/models/qwen2_vl.py
View file @
033c715b
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""Inference-only Qwen2-VL model compatible with HuggingFace weights."""
"""Inference-only Qwen2-VL model compatible with HuggingFace weights."""
import
logging
from
functools
import
lru_cache
,
partial
from
functools
import
lru_cache
,
partial
from
typing
import
Iterable
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
from
typing
import
Iterable
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
...
@@ -30,7 +31,6 @@ import torch
...
@@ -30,7 +31,6 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
einops
import
rearrange
,
repeat
from
einops
import
rearrange
,
repeat
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.activation
import
QuickGELU
from
sglang.srt.configs
import
Qwen2VLConfig
,
Qwen2VLVisionConfig
from
sglang.srt.configs
import
Qwen2VLConfig
,
Qwen2VLVisionConfig
...
@@ -50,7 +50,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
...
@@ -50,7 +50,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.models.qwen2
import
Qwen2Model
from
sglang.srt.models.qwen2
import
Qwen2Model
logger
=
init_l
ogger
(
__name__
)
logger
=
logging
.
getL
ogger
(
__name__
)
# === Vision Inputs === #
# === Vision Inputs === #
...
...
python/sglang/srt/models/xverse.py
View file @
033c715b
...
@@ -21,16 +21,16 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -21,16 +21,16 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
LlamaConfig
from
transformers
import
LlamaConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
QKVParallelLinear
,
QKVParallelLinear
,
RowParallelLinear
,
RowParallelLinear
,
)
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
...
...
python/sglang/srt/models/xverse_moe.py
View file @
033c715b
...
@@ -18,14 +18,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
...
@@ -18,14 +18,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
from
sglang.srt.distributed
import
(
...
@@ -33,6 +25,14 @@ from sglang.srt.distributed import (
...
@@ -33,6 +25,14 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
ReplicatedLinear
,
RowParallelLinear
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.moe.fused_moe_triton
import
fused_moe
from
sglang.srt.layers.moe.fused_moe_triton
import
fused_moe
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment