Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0c9c6c75
"docs/vscode:/vscode.git/clone" did not exist on "b42d61002091b7387078943be1a9594125787d10"
Unverified
Commit
0c9c6c75
authored
Jun 30, 2025
by
fzyzcjy
Committed by
GitHub
Jun 29, 2025
Browse files
Move files related to EPLB (#7580)
parent
e3f9b548
Changes
22
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
38 additions
and
48 deletions
+38
-48
python/sglang/srt/eplb/__init__.py
python/sglang/srt/eplb/__init__.py
+0
-0
python/sglang/srt/eplb/eplb_algorithms/__init__.py
python/sglang/srt/eplb/eplb_algorithms/__init__.py
+1
-1
python/sglang/srt/eplb/eplb_algorithms/deepseek.py
python/sglang/srt/eplb/eplb_algorithms/deepseek.py
+0
-0
python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
+0
-0
python/sglang/srt/eplb/eplb_manager.py
python/sglang/srt/eplb/eplb_manager.py
+2
-4
python/sglang/srt/eplb/eplb_simulator/__init__.py
python/sglang/srt/eplb/eplb_simulator/__init__.py
+0
-0
python/sglang/srt/eplb/eplb_simulator/reader.py
python/sglang/srt/eplb/eplb_simulator/reader.py
+1
-1
python/sglang/srt/eplb/expert_distribution.py
python/sglang/srt/eplb/expert_distribution.py
+1
-1
python/sglang/srt/eplb/expert_location.py
python/sglang/srt/eplb/expert_location.py
+1
-1
python/sglang/srt/eplb/expert_location_dispatch.py
python/sglang/srt/eplb/expert_location_dispatch.py
+1
-1
python/sglang/srt/eplb/expert_location_updater.py
python/sglang/srt/eplb/expert_location_updater.py
+1
-1
python/sglang/srt/layers/moe/ep_moe/layer.py
python/sglang/srt/layers/moe/ep_moe/layer.py
+2
-2
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
+1
-3
python/sglang/srt/layers/moe/topk.py
python/sglang/srt/layers/moe/topk.py
+3
-3
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+1
-3
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+13
-13
python/sglang/srt/models/deepseek_nextn.py
python/sglang/srt/models/deepseek_nextn.py
+1
-3
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+3
-5
python/sglang/srt/models/hunyuan.py
python/sglang/srt/models/hunyuan.py
+1
-1
python/sglang/srt/models/qwen2_moe.py
python/sglang/srt/models/qwen2_moe.py
+5
-5
No files found.
python/sglang/srt/eplb/__init__.py
0 → 100644
View file @
0c9c6c75
python/sglang/srt/
managers
/eplb_algorithms/__init__.py
→
python/sglang/srt/
eplb
/eplb_algorithms/__init__.py
View file @
0c9c6c75
...
...
@@ -3,7 +3,7 @@ from typing import Optional
import
torch
from
sglang.srt.
managers
.eplb_algorithms
import
deepseek
,
deepseek_vec
from
sglang.srt.
eplb
.eplb_algorithms
import
deepseek
,
deepseek_vec
class
EplbAlgorithm
(
Enum
):
...
...
python/sglang/srt/
managers
/eplb_algorithms/deepseek.py
→
python/sglang/srt/
eplb
/eplb_algorithms/deepseek.py
View file @
0c9c6c75
File moved
python/sglang/srt/
managers
/eplb_algorithms/deepseek_vec.py
→
python/sglang/srt/
eplb
/eplb_algorithms/deepseek_vec.py
View file @
0c9c6c75
File moved
python/sglang/srt/
managers
/eplb_manager.py
→
python/sglang/srt/
eplb
/eplb_manager.py
View file @
0c9c6c75
...
...
@@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, List
import
torch.cuda
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
ExpertLocationMetadata
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_location
import
ExpertLocationMetadata
if
TYPE_CHECKING
:
from
sglang.srt.model_executor.model_runner
import
ModelRunner
...
...
python/sglang/srt/eplb_simulator/__init__.py
→
python/sglang/srt/eplb
/eplb
_simulator/__init__.py
View file @
0c9c6c75
File moved
python/sglang/srt/eplb_simulator/reader.py
→
python/sglang/srt/eplb
/eplb
_simulator/reader.py
View file @
0c9c6c75
...
...
@@ -4,7 +4,7 @@ from pathlib import Path
import
torch
from
tqdm
import
tqdm
from
sglang.srt.
managers
.expert_distribution
import
(
from
sglang.srt.
eplb
.expert_distribution
import
(
_convert_global_physical_count_to_logical_count
,
)
...
...
python/sglang/srt/
managers
/expert_distribution.py
→
python/sglang/srt/
eplb
/expert_distribution.py
View file @
0c9c6c75
...
...
@@ -24,7 +24,7 @@ import einops
import
torch
import
torch.distributed
from
sglang.srt.
managers
.expert_location
import
ExpertLocationMetadata
from
sglang.srt.
eplb
.expert_location
import
ExpertLocationMetadata
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.server_args
import
ServerArgs
...
...
python/sglang/srt/
managers
/expert_location.py
→
python/sglang/srt/
eplb
/expert_location.py
View file @
0c9c6c75
...
...
@@ -23,7 +23,7 @@ import torch.distributed
import
torch.nn.functional
as
F
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.
managers
import
eplb_algorithms
from
sglang.srt.
eplb
import
eplb_algorithms
from
sglang.srt.model_loader
import
get_model_architecture
from
sglang.srt.server_args
import
ServerArgs
...
...
python/sglang/srt/
managers
/expert_location_dispatch.py
→
python/sglang/srt/
eplb
/expert_location_dispatch.py
View file @
0c9c6c75
...
...
@@ -17,7 +17,7 @@ from typing import Literal, Optional
import
torch
from
sglang.srt.
managers
.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.
eplb
.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
...
...
python/sglang/srt/
model_executor
/expert_location_updater.py
→
python/sglang/srt/
eplb
/expert_location_updater.py
View file @
0c9c6c75
...
...
@@ -20,7 +20,7 @@ import torch
import
torch.distributed
from
torch.distributed
import
P2POp
from
sglang.srt.
managers
.expert_location
import
(
from
sglang.srt.
eplb
.expert_location
import
(
ExpertLocationMetadata
,
get_global_expert_location_metadata
,
)
...
...
python/sglang/srt/layers/moe/ep_moe/layer.py
View file @
0c9c6c75
...
...
@@ -11,6 +11,8 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
)
from
sglang.srt.eplb.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.eplb.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.layers.moe.ep_moe.kernels
import
(
ep_gather
,
ep_scatter
,
...
...
@@ -40,8 +42,6 @@ from sglang.srt.layers.quantization.fp8_kernel import (
sglang_per_token_quant_fp8
,
)
from
sglang.srt.layers.quantization.fp8_utils
import
normalize_e4m3fn_to_e4m3fnuz
from
sglang.srt.managers.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.managers.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardMode
from
sglang.srt.utils
import
(
...
...
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
View file @
0c9c6c75
import
logging
from
dataclasses
import
dataclass
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.layers.quantization
import
deep_gemm_wrapper
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.utils
import
(
DeepEPMode
,
...
...
python/sglang/srt/layers/moe/topk.py
View file @
0c9c6c75
...
...
@@ -18,12 +18,12 @@ from typing import Callable, Optional
import
torch
import
torch.nn.functional
as
F
from
sglang.srt.
managers
import
expert_location_dispatch
from
sglang.srt.
managers
.expert_distribution
import
(
from
sglang.srt.
eplb
import
expert_location_dispatch
from
sglang.srt.
eplb
.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
)
from
sglang.srt.
managers
.expert_location_dispatch
import
(
from
sglang.srt.
eplb
.expert_location_dispatch
import
(
ExpertLocationDispatchInfo
,
topk_ids_logical_to_physical
,
)
...
...
python/sglang/srt/managers/scheduler.py
View file @
0c9c6c75
...
...
@@ -58,6 +58,7 @@ from sglang.srt.disaggregation.utils import (
prepare_abort
,
)
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_tokenizer
,
...
...
@@ -65,9 +66,6 @@ from sglang.srt.hf_transformers_utils import (
)
from
sglang.srt.layers.dp_attention
import
compute_dp_attention_world_info
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.io_struct
import
(
AbortReq
,
CloseSessionReqInput
,
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
0c9c6c75
...
...
@@ -39,6 +39,19 @@ from sglang.srt.distributed import (
set_mscclpp_all_reduce
,
)
from
sglang.srt.distributed.parallel_state
import
monkey_patch_vllm_parallel_state
from
sglang.srt.eplb.eplb_manager
import
EPLBManager
from
sglang.srt.eplb.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
set_global_expert_distribution_recorder
,
)
from
sglang.srt.eplb.expert_location
import
(
ExpertLocationMetadata
,
compute_initial_expert_location_metadata
,
get_global_expert_location_metadata
,
set_global_expert_location_metadata
,
)
from
sglang.srt.eplb.expert_location_updater
import
ExpertLocationUpdater
from
sglang.srt.layers.attention.tbo_backend
import
TboAttnBackend
from
sglang.srt.layers.dp_attention
import
(
get_attention_tp_group
,
...
...
@@ -54,18 +67,6 @@ from sglang.srt.layers.sampler import Sampler
from
sglang.srt.layers.torchao_utils
import
apply_torchao_config_to_model
from
sglang.srt.layers.utils
import
is_sm100_supported
from
sglang.srt.lora.lora_manager
import
LoRAManager
from
sglang.srt.managers.eplb_manager
import
EPLBManager
from
sglang.srt.managers.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
set_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
(
ExpertLocationMetadata
,
compute_initial_expert_location_metadata
,
get_global_expert_location_metadata
,
set_global_expert_location_metadata
,
)
from
sglang.srt.managers.schedule_batch
import
(
GLOBAL_SERVER_ARGS_KEYS
,
global_server_args_dict
,
...
...
@@ -84,7 +85,6 @@ from sglang.srt.mem_cache.memory_pool import (
SWAKVPool
,
)
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.expert_location_updater
import
ExpertLocationUpdater
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
,
get_model_loader
...
...
python/sglang/srt/models/deepseek_nextn.py
View file @
0c9c6c75
...
...
@@ -21,6 +21,7 @@ from torch import nn
from
transformers
import
PretrainedConfig
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
...
@@ -28,9 +29,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
VocabParallelEmbedding
,
)
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.models.deepseek_v2
import
DeepseekV2DecoderLayer
,
DeepseekV3ForCausalLM
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
0c9c6c75
...
...
@@ -32,6 +32,9 @@ from sglang.srt.distributed import (
parallel_state
,
tensor_model_parallel_all_reduce
,
)
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.eplb.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.communicator
import
(
LayerCommunicator
,
...
...
@@ -77,11 +80,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
VocabParallelEmbedding
,
)
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.managers.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
...
...
python/sglang/srt/models/hunyuan.py
View file @
0c9c6c75
...
...
@@ -28,6 +28,7 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
sglang.srt.eplb.expert_distribution
import
ExpertDistributionRecorder
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
...
...
@@ -48,7 +49,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
VocabParallelEmbedding
,
)
from
sglang.srt.managers.expert_distribution
import
ExpertDistributionRecorder
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
(
default_weight_loader
,
...
...
python/sglang/srt/models/qwen2_moe.py
View file @
0c9c6c75
...
...
@@ -31,6 +31,11 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
)
from
sglang.srt.eplb.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
)
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.communicator
import
(
LayerCommunicator
,
...
...
@@ -64,11 +69,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
VocabParallelEmbedding
,
)
from
sglang.srt.managers.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment