Unverified Commit 9f2c9568 authored by Xiaoyu Zhang's avatar Xiaoyu Zhang Committed by GitHub
Browse files

[doc] add a note for --n-share-experts-fusion args (#6154)

parent 3f2702ae
...@@ -1194,7 +1194,7 @@ class ServerArgs: ...@@ -1194,7 +1194,7 @@ class ServerArgs:
type=int, type=int,
default=0, default=0,
help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, " help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, "
"set it to tp_size can get best optimized performance.", "set it to tp_size can get best optimized performance. Note that for architectures with SM==90, we have enabled the shared experts fusion optimization by default for DeepSeek V3/R1, with n_share_experts_fusion automatically set to the TP size.",
) )
parser.add_argument( parser.add_argument(
"--disable-chunked-prefix-cache", "--disable-chunked-prefix-cache",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment