Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0c9c6c75
Unverified
Commit
0c9c6c75
authored
Jun 30, 2025
by
fzyzcjy
Committed by
GitHub
Jun 29, 2025
Browse files
Move files related to EPLB (#7580)
parent
e3f9b548
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
38 additions
and
48 deletions
+38
-48
python/sglang/srt/eplb/__init__.py
python/sglang/srt/eplb/__init__.py
+0
-0
python/sglang/srt/eplb/eplb_algorithms/__init__.py
python/sglang/srt/eplb/eplb_algorithms/__init__.py
+1
-1
python/sglang/srt/eplb/eplb_algorithms/deepseek.py
python/sglang/srt/eplb/eplb_algorithms/deepseek.py
+0
-0
python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
+0
-0
python/sglang/srt/eplb/eplb_manager.py
python/sglang/srt/eplb/eplb_manager.py
+2
-4
python/sglang/srt/eplb/eplb_simulator/__init__.py
python/sglang/srt/eplb/eplb_simulator/__init__.py
+0
-0
python/sglang/srt/eplb/eplb_simulator/reader.py
python/sglang/srt/eplb/eplb_simulator/reader.py
+1
-1
python/sglang/srt/eplb/expert_distribution.py
python/sglang/srt/eplb/expert_distribution.py
+1
-1
python/sglang/srt/eplb/expert_location.py
python/sglang/srt/eplb/expert_location.py
+1
-1
python/sglang/srt/eplb/expert_location_dispatch.py
python/sglang/srt/eplb/expert_location_dispatch.py
+1
-1
python/sglang/srt/eplb/expert_location_updater.py
python/sglang/srt/eplb/expert_location_updater.py
+1
-1
python/sglang/srt/layers/moe/ep_moe/layer.py
python/sglang/srt/layers/moe/ep_moe/layer.py
+2
-2
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
+1
-3
python/sglang/srt/layers/moe/topk.py
python/sglang/srt/layers/moe/topk.py
+3
-3
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+1
-3
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+13
-13
python/sglang/srt/models/deepseek_nextn.py
python/sglang/srt/models/deepseek_nextn.py
+1
-3
python/sglang/srt/models/deepseek_v2.py
python/sglang/srt/models/deepseek_v2.py
+3
-5
python/sglang/srt/models/hunyuan.py
python/sglang/srt/models/hunyuan.py
+1
-1
python/sglang/srt/models/qwen2_moe.py
python/sglang/srt/models/qwen2_moe.py
+5
-5
No files found.
python/sglang/srt/eplb/__init__.py
0 → 100644
View file @
0c9c6c75
python/sglang/srt/
managers
/eplb_algorithms/__init__.py
→
python/sglang/srt/
eplb
/eplb_algorithms/__init__.py
View file @
0c9c6c75
...
@@ -3,7 +3,7 @@ from typing import Optional
...
@@ -3,7 +3,7 @@ from typing import Optional
import
torch
import
torch
from
sglang.srt.
managers
.eplb_algorithms
import
deepseek
,
deepseek_vec
from
sglang.srt.
eplb
.eplb_algorithms
import
deepseek
,
deepseek_vec
class
EplbAlgorithm
(
Enum
):
class
EplbAlgorithm
(
Enum
):
...
...
python/sglang/srt/
managers
/eplb_algorithms/deepseek.py
→
python/sglang/srt/
eplb
/eplb_algorithms/deepseek.py
View file @
0c9c6c75
File moved
python/sglang/srt/
managers
/eplb_algorithms/deepseek_vec.py
→
python/sglang/srt/
eplb
/eplb_algorithms/deepseek_vec.py
View file @
0c9c6c75
File moved
python/sglang/srt/
managers
/eplb_manager.py
→
python/sglang/srt/
eplb
/eplb_manager.py
View file @
0c9c6c75
...
@@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, List
...
@@ -4,10 +4,8 @@ from typing import TYPE_CHECKING, List
import
torch.cuda
import
torch.cuda
from
sglang.srt.managers.expert_distribution
import
(
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
get_global_expert_distribution_recorder
,
from
sglang.srt.eplb.expert_location
import
ExpertLocationMetadata
)
from
sglang.srt.managers.expert_location
import
ExpertLocationMetadata
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
sglang.srt.model_executor.model_runner
import
ModelRunner
from
sglang.srt.model_executor.model_runner
import
ModelRunner
...
...
python/sglang/srt/eplb_simulator/__init__.py
→
python/sglang/srt/eplb
/eplb
_simulator/__init__.py
View file @
0c9c6c75
File moved
python/sglang/srt/eplb_simulator/reader.py
→
python/sglang/srt/eplb
/eplb
_simulator/reader.py
View file @
0c9c6c75
...
@@ -4,7 +4,7 @@ from pathlib import Path
...
@@ -4,7 +4,7 @@ from pathlib import Path
import
torch
import
torch
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
sglang.srt.
managers
.expert_distribution
import
(
from
sglang.srt.
eplb
.expert_distribution
import
(
_convert_global_physical_count_to_logical_count
,
_convert_global_physical_count_to_logical_count
,
)
)
...
...
python/sglang/srt/
managers
/expert_distribution.py
→
python/sglang/srt/
eplb
/expert_distribution.py
View file @
0c9c6c75
...
@@ -24,7 +24,7 @@ import einops
...
@@ -24,7 +24,7 @@ import einops
import
torch
import
torch
import
torch.distributed
import
torch.distributed
from
sglang.srt.
managers
.expert_location
import
ExpertLocationMetadata
from
sglang.srt.
eplb
.expert_location
import
ExpertLocationMetadata
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
...
...
python/sglang/srt/
managers
/expert_location.py
→
python/sglang/srt/
eplb
/expert_location.py
View file @
0c9c6c75
...
@@ -23,7 +23,7 @@ import torch.distributed
...
@@ -23,7 +23,7 @@ import torch.distributed
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.configs.model_config
import
ModelConfig
from
sglang.srt.
managers
import
eplb_algorithms
from
sglang.srt.
eplb
import
eplb_algorithms
from
sglang.srt.model_loader
import
get_model_architecture
from
sglang.srt.model_loader
import
get_model_architecture
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
...
...
python/sglang/srt/
managers
/expert_location_dispatch.py
→
python/sglang/srt/
eplb
/expert_location_dispatch.py
View file @
0c9c6c75
...
@@ -17,7 +17,7 @@ from typing import Literal, Optional
...
@@ -17,7 +17,7 @@ from typing import Literal, Optional
import
torch
import
torch
from
sglang.srt.
managers
.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.
eplb
.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
...
...
python/sglang/srt/
model_executor
/expert_location_updater.py
→
python/sglang/srt/
eplb
/expert_location_updater.py
View file @
0c9c6c75
...
@@ -20,7 +20,7 @@ import torch
...
@@ -20,7 +20,7 @@ import torch
import
torch.distributed
import
torch.distributed
from
torch.distributed
import
P2POp
from
torch.distributed
import
P2POp
from
sglang.srt.
managers
.expert_location
import
(
from
sglang.srt.
eplb
.expert_location
import
(
ExpertLocationMetadata
,
ExpertLocationMetadata
,
get_global_expert_location_metadata
,
get_global_expert_location_metadata
,
)
)
...
...
python/sglang/srt/layers/moe/ep_moe/layer.py
View file @
0c9c6c75
...
@@ -11,6 +11,8 @@ from sglang.srt.distributed import (
...
@@ -11,6 +11,8 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
)
)
from
sglang.srt.eplb.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.eplb.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.layers.moe.ep_moe.kernels
import
(
from
sglang.srt.layers.moe.ep_moe.kernels
import
(
ep_gather
,
ep_gather
,
ep_scatter
,
ep_scatter
,
...
@@ -40,8 +42,6 @@ from sglang.srt.layers.quantization.fp8_kernel import (
...
@@ -40,8 +42,6 @@ from sglang.srt.layers.quantization.fp8_kernel import (
sglang_per_token_quant_fp8
,
sglang_per_token_quant_fp8
,
)
)
from
sglang.srt.layers.quantization.fp8_utils
import
normalize_e4m3fn_to_e4m3fnuz
from
sglang.srt.layers.quantization.fp8_utils
import
normalize_e4m3fn_to_e4m3fnuz
from
sglang.srt.managers.expert_location
import
get_global_expert_location_metadata
from
sglang.srt.managers.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardMode
from
sglang.srt.model_executor.forward_batch_info
import
ForwardMode
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
...
...
python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
View file @
0c9c6c75
import
logging
import
logging
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.layers.quantization
import
deep_gemm_wrapper
from
sglang.srt.layers.quantization
import
deep_gemm_wrapper
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
DeepEPMode
,
DeepEPMode
,
...
...
python/sglang/srt/layers/moe/topk.py
View file @
0c9c6c75
...
@@ -18,12 +18,12 @@ from typing import Callable, Optional
...
@@ -18,12 +18,12 @@ from typing import Callable, Optional
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
sglang.srt.
managers
import
expert_location_dispatch
from
sglang.srt.
eplb
import
expert_location_dispatch
from
sglang.srt.
managers
.expert_distribution
import
(
from
sglang.srt.
eplb
.expert_distribution
import
(
ExpertDistributionRecorder
,
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
get_global_expert_distribution_recorder
,
)
)
from
sglang.srt.
managers
.expert_location_dispatch
import
(
from
sglang.srt.
eplb
.expert_location_dispatch
import
(
ExpertLocationDispatchInfo
,
ExpertLocationDispatchInfo
,
topk_ids_logical_to_physical
,
topk_ids_logical_to_physical
,
)
)
...
...
python/sglang/srt/managers/scheduler.py
View file @
0c9c6c75
...
@@ -58,6 +58,7 @@ from sglang.srt.disaggregation.utils import (
...
@@ -58,6 +58,7 @@ from sglang.srt.disaggregation.utils import (
prepare_abort
,
prepare_abort
,
)
)
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.distributed
import
get_pp_group
,
get_world_group
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.hf_transformers_utils
import
(
from
sglang.srt.hf_transformers_utils
import
(
get_processor
,
get_processor
,
get_tokenizer
,
get_tokenizer
,
...
@@ -65,9 +66,6 @@ from sglang.srt.hf_transformers_utils import (
...
@@ -65,9 +66,6 @@ from sglang.srt.hf_transformers_utils import (
)
)
from
sglang.srt.layers.dp_attention
import
compute_dp_attention_world_info
from
sglang.srt.layers.dp_attention
import
compute_dp_attention_world_info
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.layers.logits_processor
import
LogitsProcessorOutput
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.io_struct
import
(
from
sglang.srt.managers.io_struct
import
(
AbortReq
,
AbortReq
,
CloseSessionReqInput
,
CloseSessionReqInput
,
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
0c9c6c75
...
@@ -39,6 +39,19 @@ from sglang.srt.distributed import (
...
@@ -39,6 +39,19 @@ from sglang.srt.distributed import (
set_mscclpp_all_reduce
,
set_mscclpp_all_reduce
,
)
)
from
sglang.srt.distributed.parallel_state
import
monkey_patch_vllm_parallel_state
from
sglang.srt.distributed.parallel_state
import
monkey_patch_vllm_parallel_state
from
sglang.srt.eplb.eplb_manager
import
EPLBManager
from
sglang.srt.eplb.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
set_global_expert_distribution_recorder
,
)
from
sglang.srt.eplb.expert_location
import
(
ExpertLocationMetadata
,
compute_initial_expert_location_metadata
,
get_global_expert_location_metadata
,
set_global_expert_location_metadata
,
)
from
sglang.srt.eplb.expert_location_updater
import
ExpertLocationUpdater
from
sglang.srt.layers.attention.tbo_backend
import
TboAttnBackend
from
sglang.srt.layers.attention.tbo_backend
import
TboAttnBackend
from
sglang.srt.layers.dp_attention
import
(
from
sglang.srt.layers.dp_attention
import
(
get_attention_tp_group
,
get_attention_tp_group
,
...
@@ -54,18 +67,6 @@ from sglang.srt.layers.sampler import Sampler
...
@@ -54,18 +67,6 @@ from sglang.srt.layers.sampler import Sampler
from
sglang.srt.layers.torchao_utils
import
apply_torchao_config_to_model
from
sglang.srt.layers.torchao_utils
import
apply_torchao_config_to_model
from
sglang.srt.layers.utils
import
is_sm100_supported
from
sglang.srt.layers.utils
import
is_sm100_supported
from
sglang.srt.lora.lora_manager
import
LoRAManager
from
sglang.srt.lora.lora_manager
import
LoRAManager
from
sglang.srt.managers.eplb_manager
import
EPLBManager
from
sglang.srt.managers.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
set_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
(
ExpertLocationMetadata
,
compute_initial_expert_location_metadata
,
get_global_expert_location_metadata
,
set_global_expert_location_metadata
,
)
from
sglang.srt.managers.schedule_batch
import
(
from
sglang.srt.managers.schedule_batch
import
(
GLOBAL_SERVER_ARGS_KEYS
,
GLOBAL_SERVER_ARGS_KEYS
,
global_server_args_dict
,
global_server_args_dict
,
...
@@ -84,7 +85,6 @@ from sglang.srt.mem_cache.memory_pool import (
...
@@ -84,7 +85,6 @@ from sglang.srt.mem_cache.memory_pool import (
SWAKVPool
,
SWAKVPool
,
)
)
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.expert_location_updater
import
ExpertLocationUpdater
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
,
get_model_loader
from
sglang.srt.model_loader.loader
import
DefaultModelLoader
,
get_model_loader
...
...
python/sglang/srt/models/deepseek_nextn.py
View file @
0c9c6c75
...
@@ -21,6 +21,7 @@ from torch import nn
...
@@ -21,6 +21,7 @@ from torch import nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.distributed
import
get_tensor_model_parallel_world_size
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
...
@@ -28,9 +29,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
...
@@ -28,9 +29,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
ParallelLMHead
,
VocabParallelEmbedding
,
VocabParallelEmbedding
,
)
)
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.models.deepseek_v2
import
DeepseekV2DecoderLayer
,
DeepseekV3ForCausalLM
from
sglang.srt.models.deepseek_v2
import
DeepseekV2DecoderLayer
,
DeepseekV3ForCausalLM
...
...
python/sglang/srt/models/deepseek_v2.py
View file @
0c9c6c75
...
@@ -32,6 +32,9 @@ from sglang.srt.distributed import (
...
@@ -32,6 +32,9 @@ from sglang.srt.distributed import (
parallel_state
,
parallel_state
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
sglang.srt.eplb.expert_distribution
import
get_global_expert_distribution_recorder
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.eplb.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.communicator
import
(
from
sglang.srt.layers.communicator
import
(
LayerCommunicator
,
LayerCommunicator
,
...
@@ -77,11 +80,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
...
@@ -77,11 +80,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
ParallelLMHead
,
VocabParallelEmbedding
,
VocabParallelEmbedding
,
)
)
from
sglang.srt.managers.expert_distribution
import
(
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.managers.expert_location_dispatch
import
ExpertLocationDispatchInfo
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
...
...
python/sglang/srt/models/hunyuan.py
View file @
0c9c6c75
...
@@ -28,6 +28,7 @@ from sglang.srt.distributed import (
...
@@ -28,6 +28,7 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
sglang.srt.eplb.expert_distribution
import
ExpertDistributionRecorder
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.layernorm
import
RMSNorm
from
sglang.srt.layers.linear
import
(
from
sglang.srt.layers.linear
import
(
...
@@ -48,7 +49,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
...
@@ -48,7 +49,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
ParallelLMHead
,
VocabParallelEmbedding
,
VocabParallelEmbedding
,
)
)
from
sglang.srt.managers.expert_distribution
import
ExpertDistributionRecorder
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
(
from
sglang.srt.model_loader.weight_utils
import
(
default_weight_loader
,
default_weight_loader
,
...
...
python/sglang/srt/models/qwen2_moe.py
View file @
0c9c6c75
...
@@ -31,6 +31,11 @@ from sglang.srt.distributed import (
...
@@ -31,6 +31,11 @@ from sglang.srt.distributed import (
get_tensor_model_parallel_world_size
,
get_tensor_model_parallel_world_size
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
sglang.srt.eplb.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
)
from
sglang.srt.eplb.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.activation
import
SiluAndMul
from
sglang.srt.layers.communicator
import
(
from
sglang.srt.layers.communicator
import
(
LayerCommunicator
,
LayerCommunicator
,
...
@@ -64,11 +69,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
...
@@ -64,11 +69,6 @@ from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead
,
ParallelLMHead
,
VocabParallelEmbedding
,
VocabParallelEmbedding
,
)
)
from
sglang.srt.managers.expert_distribution
import
(
ExpertDistributionRecorder
,
get_global_expert_distribution_recorder
,
)
from
sglang.srt.managers.expert_location
import
ModelConfigForExpertLocation
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment