Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
5dc54f1a
Unverified
Commit
5dc54f1a
authored
Jan 17, 2025
by
Yineng Zhang
Committed by
GitHub
Jan 17, 2025
Browse files
feat: remove vllm distributed (#2907)
Co-authored-by:
Zhangyi
<
1109276519@qq.com
>
parent
f3e9b489
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
12 additions
and
12 deletions
+12
-12
python/sglang/srt/models/qwen2_vl.py
python/sglang/srt/models/qwen2_vl.py
+2
-2
python/sglang/srt/models/stablelm.py
python/sglang/srt/models/stablelm.py
+1
-1
python/sglang/srt/models/torch_native_llama.py
python/sglang/srt/models/torch_native_llama.py
+3
-3
python/sglang/srt/models/xverse.py
python/sglang/srt/models/xverse.py
+1
-1
python/sglang/srt/models/xverse_moe.py
python/sglang/srt/models/xverse_moe.py
+5
-5
No files found.
python/sglang/srt/models/qwen2_vl.py
View file @
5dc54f1a
...
...
@@ -30,12 +30,12 @@ import torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
einops
import
rearrange
,
repeat
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
QuickGELU
from
sglang.srt.configs
import
Qwen2VLConfig
,
Qwen2VLVisionConfig
from
sglang.srt.distributed
import
parallel_state
from
sglang.srt.distributed
import
utils
as
dist_utils
from
sglang.srt.hf_transformers_utils
import
get_processor
from
sglang.srt.layers.attention.triton_ops.prefill_attention
import
(
context_attention_fwd
,
...
...
python/sglang/srt/models/stablelm.py
View file @
5dc54f1a
...
...
@@ -24,9 +24,9 @@ from typing import Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.linear
import
(
MergedColumnParallelLinear
,
...
...
python/sglang/srt/models/torch_native_llama.py
View file @
5dc54f1a
...
...
@@ -47,12 +47,12 @@ import torch
from
torch
import
nn
from
torch.nn.parameter
import
Parameter
from
transformers
import
LlamaConfig
from
vllm.distributed
import
(
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
,
LogitsProcessorOutput
...
...
python/sglang/srt/models/xverse.py
View file @
5dc54f1a
...
...
@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
@@ -31,6 +30,7 @@ from vllm.model_executor.layers.linear import (
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.radix_attention
import
RadixAttention
...
...
python/sglang/srt/models/xverse_moe.py
View file @
5dc54f1a
...
...
@@ -18,11 +18,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import
torch
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.linear
import
(
...
...
@@ -33,6 +28,11 @@ from vllm.model_executor.layers.linear import (
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
sglang.srt.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.moe.fused_moe_triton
import
fused_moe
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment