Unverified Commit 4c54f442 authored by Cheng Wan's avatar Cheng Wan Committed by GitHub
Browse files

[deepep] fix: shared experts are not initialized when shared experts fusion is enabled (#5072)

parent 924ca7c9
...@@ -183,7 +183,7 @@ class ServerArgs: ...@@ -183,7 +183,7 @@ class ServerArgs:
enable_flashmla: bool = False enable_flashmla: bool = False
flashinfer_mla_disable_ragged: bool = False flashinfer_mla_disable_ragged: bool = False
warmups: Optional[str] = None warmups: Optional[str] = None
n_share_experts_fusion: Optional[int] = None n_share_experts_fusion: int = 0
disable_shared_experts_fusion: bool = False disable_shared_experts_fusion: bool = False
# Debug tensor dumps # Debug tensor dumps
...@@ -1110,7 +1110,7 @@ class ServerArgs: ...@@ -1110,7 +1110,7 @@ class ServerArgs:
parser.add_argument( parser.add_argument(
"--n-share-experts-fusion", "--n-share-experts-fusion",
type=int, type=int,
default=None, default=0,
help="The number of shared_experts need to be replica to fuse with normal experts in deepseek v3/r1 " help="The number of shared_experts need to be replica to fuse with normal experts in deepseek v3/r1 "
"we use tp_size by default.", "we use tp_size by default.",
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment