Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
5dc54f1a
Unverified
Commit
5dc54f1a
authored
Jan 17, 2025
by
Yineng Zhang
Committed by
GitHub
Jan 17, 2025
Browse files
feat: remove vllm distributed (#2907)
Co-authored-by:
Zhangyi
<
1109276519@qq.com
>
parent
f3e9b489
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
39 additions
and
38 deletions
+39
-38
python/sglang/srt/models/gemma.py
python/sglang/srt/models/gemma.py
+1
-1
python/sglang/srt/models/gemma2.py
python/sglang/srt/models/gemma2.py
+1
-1
python/sglang/srt/models/gpt2.py
python/sglang/srt/models/gpt2.py
+2
-1
python/sglang/srt/models/gpt_bigcode.py
python/sglang/srt/models/gpt_bigcode.py
+1
-1
python/sglang/srt/models/granite.py
python/sglang/srt/models/granite.py
+1
-1
python/sglang/srt/models/grok.py
python/sglang/srt/models/grok.py
+3
-3
python/sglang/srt/models/internlm2.py
python/sglang/srt/models/internlm2.py
+1
-1
python/sglang/srt/models/llama.py
python/sglang/srt/models/llama.py
+4
-4
python/sglang/srt/models/minicpm.py
python/sglang/srt/models/minicpm.py
+1
-1
python/sglang/srt/models/minicpm3.py
python/sglang/srt/models/minicpm3.py
+1
-1
python/sglang/srt/models/mixtral.py
python/sglang/srt/models/mixtral.py
+3
-3
python/sglang/srt/models/mixtral_quant.py
python/sglang/srt/models/mixtral_quant.py
+3
-3
python/sglang/srt/models/mllama.py
python/sglang/srt/models/mllama.py
+2
-2
python/sglang/srt/models/olmo.py
python/sglang/srt/models/olmo.py
+1
-1
python/sglang/srt/models/olmo2.py
python/sglang/srt/models/olmo2.py
+4
-4
python/sglang/srt/models/olmoe.py
python/sglang/srt/models/olmoe.py
+4
-4
python/sglang/srt/models/phi3_small.py
python/sglang/srt/models/phi3_small.py
+1
-1
python/sglang/srt/models/qwen.py
python/sglang/srt/models/qwen.py
+1
-1
python/sglang/srt/models/qwen2.py
python/sglang/srt/models/qwen2.py
+1
-1
python/sglang/srt/models/qwen2_moe.py
python/sglang/srt/models/qwen2_moe.py
+3
-3
No files found.
python/sglang/srt/models/gemma.py
View file @
5dc54f1a
...
...
@@ -21,9 +21,9 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
GeluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/gemma2.py
View file @
5dc54f1a
...
...
@@ -20,8 +20,8 @@ from typing import Iterable, Optional, Set, Tuple, Union
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
GeluAndMul
from
sglang.srt.layers.layernorm
import
GemmaRMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/gpt2.py
View file @
5dc54f1a
...
...
@@ -22,10 +22,11 @@ from typing import Iterable, List, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
GPT2Config
from
vllm.distributed.parallel_state
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
sglang.srt.distributed.parallel_state
import
get_tensor_model_parallel_world_size
# from sglang.srt.layers.activation import get_act_fn
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
...
...
python/sglang/srt/models/gpt_bigcode.py
View file @
5dc54f1a
...
...
@@ -21,8 +21,8 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
GPTBigCodeConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
get_act_fn
from
sglang.srt.layers.linear
import
(
ColumnParallelLinear
,
...
...
python/sglang/srt/models/granite.py
View file @
5dc54f1a
...
...
@@ -22,9 +22,9 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
GraniteConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/grok.py
View file @
5dc54f1a
...
...
@@ -22,12 +22,12 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
(
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.activation
import
GeluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/internlm2.py
View file @
5dc54f1a
...
...
@@ -19,9 +19,9 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/llama.py
View file @
5dc54f1a
...
...
@@ -22,13 +22,13 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.model_loader.weight_utils
import
kv_cache_scales_loader
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/minicpm.py
View file @
5dc54f1a
...
...
@@ -18,9 +18,9 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/minicpm3.py
View file @
5dc54f1a
...
...
@@ -19,7 +19,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
MergedColumnParallelLinear
,
...
...
@@ -28,6 +27,7 @@ from vllm.model_executor.layers.linear import (
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
...
...
python/sglang/srt/models/mixtral.py
View file @
5dc54f1a
...
...
@@ -21,12 +21,12 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
MixtralConfig
from
vllm.distributed
import
(
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
QKVParallelLinear
,
...
...
python/sglang/srt/models/mixtral_quant.py
View file @
5dc54f1a
...
...
@@ -23,13 +23,13 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers
import
MixtralConfig
from
vllm.distributed
import
(
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
QKVParallelLinear
,
...
...
python/sglang/srt/models/mllama.py
View file @
5dc54f1a
...
...
@@ -8,14 +8,14 @@ import torch
import
torch.nn.functional
as
F
import
torch.utils.checkpoint
import
transformers.models.mllama.configuration_mllama
as
config_mllama
import
vllm.distributed.parallel_state
as
ps
from
torch
import
nn
from
transformers.modeling_outputs
import
BaseModelOutput
,
CausalLMOutputWithPast
from
transformers.models.mllama.modeling_mllama
import
(
_prepare_aspect_ratio_attention_mask
,
)
from
vllm.distributed
import
get_tensor_model_parallel_world_size
import
sglang.srt.distributed.parallel_state
as
ps
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
get_act_fn
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/olmo.py
View file @
5dc54f1a
...
...
@@ -20,9 +20,9 @@ from typing import Iterable, List, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
OlmoConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
python/sglang/srt/models/olmo2.py
View file @
5dc54f1a
...
...
@@ -21,15 +21,15 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
(
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
split_tensor_along_last_dim
,
tensor_model_parallel_all_gather
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/olmoe.py
View file @
5dc54f1a
...
...
@@ -23,10 +23,6 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
...
...
@@ -35,6 +31,10 @@ from vllm.model_executor.layers.linear import (
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
,
LogitsProcessorOutput
...
...
python/sglang/srt/models/phi3_small.py
View file @
5dc54f1a
...
...
@@ -5,9 +5,9 @@ import torch
from
torch
import
nn
from
transformers
import
Phi3Config
from
transformers.configuration_utils
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.linear
import
(
MergedColumnParallelLinear
,
QKVParallelLinear
,
...
...
python/sglang/srt/models/qwen.py
View file @
5dc54f1a
...
...
@@ -20,9 +20,9 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/qwen2.py
View file @
5dc54f1a
...
...
@@ -20,9 +20,9 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
python/sglang/srt/models/qwen2_moe.py
View file @
5dc54f1a
...
...
@@ -22,12 +22,12 @@ import torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
(
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment