Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
cdcbde5f
Unverified
Commit
cdcbde5f
authored
Jul 29, 2024
by
Liangsheng Yin
Committed by
GitHub
Jul 29, 2024
Browse files
Code structure refactor (#807)
parent
21e22b9e
Changes
41
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
24 additions
and
24 deletions
+24
-24
python/sglang/srt/models/dbrx.py
python/sglang/srt/models/dbrx.py
+1
-1
python/sglang/srt/models/deepseek.py
python/sglang/srt/models/deepseek.py
+1
-1
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+1
-1
python/sglang/srt/models/gemma.py
python/sglang/srt/models/gemma.py
+1
-1
python/sglang/srt/models/gemma2.py
python/sglang/srt/models/gemma2.py
+1
-1
python/sglang/srt/models/gpt_bigcode.py
python/sglang/srt/models/gpt_bigcode.py
+1
-1
python/sglang/srt/models/grok.py
python/sglang/srt/models/grok.py
+1
-1
python/sglang/srt/models/internlm2.py
python/sglang/srt/models/internlm2.py
+1
-1
python/sglang/srt/models/llama2.py
python/sglang/srt/models/llama2.py
+1
-1
python/sglang/srt/models/llama_classification.py
python/sglang/srt/models/llama_classification.py
+1
-1
python/sglang/srt/models/llava.py
python/sglang/srt/models/llava.py
+2
-2
python/sglang/srt/models/llavavid.py
python/sglang/srt/models/llavavid.py
+2
-2
python/sglang/srt/models/minicpm.py
python/sglang/srt/models/minicpm.py
+1
-1
python/sglang/srt/models/mixtral.py
python/sglang/srt/models/mixtral.py
+1
-1
python/sglang/srt/models/mixtral_quant.py
python/sglang/srt/models/mixtral_quant.py
+1
-1
python/sglang/srt/models/qwen.py
python/sglang/srt/models/qwen.py
+1
-1
python/sglang/srt/models/qwen2.py
python/sglang/srt/models/qwen2.py
+1
-1
python/sglang/srt/models/qwen2_moe.py
python/sglang/srt/models/qwen2_moe.py
+1
-1
python/sglang/srt/models/stablelm.py
python/sglang/srt/models/stablelm.py
+1
-1
python/sglang/srt/server.py
python/sglang/srt/server.py
+3
-3
No files found.
python/sglang/srt/models/dbrx.py
View file @
cdcbde5f
...
@@ -45,7 +45,7 @@ from vllm.transformers_utils.configs.dbrx import DbrxConfig
...
@@ -45,7 +45,7 @@ from vllm.transformers_utils.configs.dbrx import DbrxConfig
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
DbrxRouter
(
nn
.
Module
):
class
DbrxRouter
(
nn
.
Module
):
...
...
python/sglang/srt/models/deepseek.py
View file @
cdcbde5f
...
@@ -46,7 +46,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -46,7 +46,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.
controller.infer
_batch
import
InputMetadata
from
sglang.srt.managers.
schedule
_batch
import
InputMetadata
class
DeepseekMLP
(
nn
.
Module
):
class
DeepseekMLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
cdcbde5f
...
@@ -45,7 +45,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -45,7 +45,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
DeepseekV2MLP
(
nn
.
Module
):
class
DeepseekV2MLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/gemma.py
View file @
cdcbde5f
...
@@ -37,7 +37,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -37,7 +37,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
GemmaMLP
(
nn
.
Module
):
class
GemmaMLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/gemma2.py
View file @
cdcbde5f
...
@@ -42,7 +42,7 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
...
@@ -42,7 +42,7 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
GemmaRMSNorm
(
CustomOp
):
class
GemmaRMSNorm
(
CustomOp
):
...
...
python/sglang/srt/models/gpt_bigcode.py
View file @
cdcbde5f
...
@@ -35,7 +35,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -35,7 +35,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.
controller.infer
_batch
import
InputMetadata
from
sglang.srt.managers.
schedule
_batch
import
InputMetadata
class
GPTBigCodeAttention
(
nn
.
Module
):
class
GPTBigCodeAttention
(
nn
.
Module
):
...
...
python/sglang/srt/models/grok.py
View file @
cdcbde5f
...
@@ -52,7 +52,7 @@ from vllm.utils import print_warning_once
...
@@ -52,7 +52,7 @@ from vllm.utils import print_warning_once
from
sglang.srt.layers.fused_moe
import
fused_moe
from
sglang.srt.layers.fused_moe
import
fused_moe
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
use_fused
=
True
use_fused
=
True
...
...
python/sglang/srt/models/internlm2.py
View file @
cdcbde5f
...
@@ -40,7 +40,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -40,7 +40,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
InternLM2MLP
(
nn
.
Module
):
class
InternLM2MLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/llama2.py
View file @
cdcbde5f
...
@@ -36,7 +36,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -36,7 +36,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
MergedColumnParallelLinear
=
None
MergedColumnParallelLinear
=
None
QKVParallelLinear
=
None
QKVParallelLinear
=
None
...
...
python/sglang/srt/models/llama_classification.py
View file @
cdcbde5f
...
@@ -25,7 +25,7 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConf
...
@@ -25,7 +25,7 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConf
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitProcessorOutput
from
sglang.srt.layers.logits_processor
import
LogitProcessorOutput
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
from
sglang.srt.models.llama2
import
LlamaModel
from
sglang.srt.models.llama2
import
LlamaModel
...
...
python/sglang/srt/models/llava.py
View file @
cdcbde5f
...
@@ -32,13 +32,13 @@ from vllm.config import CacheConfig
...
@@ -32,13 +32,13 @@ from vllm.config import CacheConfig
from
vllm.model_executor.layers.quantization.base_config
import
QuantizationConfig
from
vllm.model_executor.layers.quantization.base_config
import
QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.managers.controller.infer_batch
import
ForwardMode
from
sglang.srt.managers.schedule_batch
import
ForwardMode
from
sglang.srt.managers.controller.model_runner
import
InputMetadata
from
sglang.srt.mm_utils
import
(
from
sglang.srt.mm_utils
import
(
get_anyres_image_grid_shape
,
get_anyres_image_grid_shape
,
unpad_image
,
unpad_image
,
unpad_image_shape
,
unpad_image_shape
,
)
)
from
sglang.srt.model_executor.model_runner
import
InputMetadata
from
sglang.srt.models.llama2
import
LlamaForCausalLM
from
sglang.srt.models.llama2
import
LlamaForCausalLM
from
sglang.srt.models.mistral
import
MistralForCausalLM
from
sglang.srt.models.mistral
import
MistralForCausalLM
from
sglang.srt.models.qwen2
import
Qwen2ForCausalLM
from
sglang.srt.models.qwen2
import
Qwen2ForCausalLM
...
...
python/sglang/srt/models/llavavid.py
View file @
cdcbde5f
...
@@ -26,13 +26,13 @@ from vllm.config import CacheConfig
...
@@ -26,13 +26,13 @@ from vllm.config import CacheConfig
from
vllm.model_executor.layers.quantization.base_config
import
QuantizationConfig
from
vllm.model_executor.layers.quantization.base_config
import
QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.managers.controller.infer_batch
import
ForwardMode
from
sglang.srt.managers.schedule_batch
import
ForwardMode
from
sglang.srt.managers.controller.model_runner
import
InputMetadata
from
sglang.srt.mm_utils
import
(
from
sglang.srt.mm_utils
import
(
get_anyres_image_grid_shape
,
get_anyres_image_grid_shape
,
unpad_image
,
unpad_image
,
unpad_image_shape
,
unpad_image_shape
,
)
)
from
sglang.srt.model_executor.model_runner
import
InputMetadata
from
sglang.srt.models.llama2
import
LlamaForCausalLM
from
sglang.srt.models.llama2
import
LlamaForCausalLM
...
...
python/sglang/srt/models/minicpm.py
View file @
cdcbde5f
...
@@ -39,7 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -39,7 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
MiniCPMMLP
(
nn
.
Module
):
class
MiniCPMMLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/mixtral.py
View file @
cdcbde5f
...
@@ -50,7 +50,7 @@ from vllm.utils import print_warning_once
...
@@ -50,7 +50,7 @@ from vllm.utils import print_warning_once
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
MixtralMoE
(
nn
.
Module
):
class
MixtralMoE
(
nn
.
Module
):
...
...
python/sglang/srt/models/mixtral_quant.py
View file @
cdcbde5f
...
@@ -45,7 +45,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -45,7 +45,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
MixtralMLP
(
nn
.
Module
):
class
MixtralMLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/qwen.py
View file @
cdcbde5f
...
@@ -39,7 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -39,7 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
QWenMLP
(
nn
.
Module
):
class
QWenMLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/qwen2.py
View file @
cdcbde5f
...
@@ -39,7 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -39,7 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
Qwen2Config
=
None
Qwen2Config
=
None
...
...
python/sglang/srt/models/qwen2_moe.py
View file @
cdcbde5f
...
@@ -51,7 +51,7 @@ from vllm.sequence import IntermediateTensors, SamplerOutput
...
@@ -51,7 +51,7 @@ from vllm.sequence import IntermediateTensors, SamplerOutput
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
Qwen2MoeMLP
(
nn
.
Module
):
class
Qwen2MoeMLP
(
nn
.
Module
):
...
...
python/sglang/srt/models/stablelm.py
View file @
cdcbde5f
...
@@ -40,7 +40,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...
@@ -40,7 +40,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.m
anagers.controlle
r.model_runner
import
InputMetadata
from
sglang.srt.m
odel_executo
r.model_runner
import
InputMetadata
class
StablelmMLP
(
nn
.
Module
):
class
StablelmMLP
(
nn
.
Module
):
...
...
python/sglang/srt/server.py
View file @
cdcbde5f
...
@@ -44,11 +44,11 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
...
@@ -44,11 +44,11 @@ from fastapi.responses import JSONResponse, Response, StreamingResponse
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.srt.constrained
import
disable_cache
from
sglang.srt.constrained
import
disable_cache
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.managers.controller
.manager
_multi
import
(
from
sglang.srt.managers.controller_multi
import
(
start_controller_process
as
start_controller_process_multi
,
start_controller_process
as
start_controller_process_multi
,
)
)
from
sglang.srt.managers.controller
.manager
_single
import
launch_tp_servers
from
sglang.srt.managers.controller_single
import
launch_tp_servers
from
sglang.srt.managers.controller
.manager
_single
import
(
from
sglang.srt.managers.controller_single
import
(
start_controller_process
as
start_controller_process_single
,
start_controller_process
as
start_controller_process_single
,
)
)
from
sglang.srt.managers.detokenizer_manager
import
start_detokenizer_process
from
sglang.srt.managers.detokenizer_manager
import
start_detokenizer_process
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment