"examples/distributed/graphsage/node_classification.py" did not exist on "4889c5782290f1990c924fbea14ba904a3248231"
Unverified Commit fefa19fe authored by miter's avatar miter Committed by GitHub
Browse files

Update cmdline --enable-dp-attention help string for Qwen 2/3 Moe models. (#6524)


Signed-off-by: default avatarmiter <miterv@outlook.com>
parent 9c574585
...@@ -1138,7 +1138,7 @@ class ServerArgs: ...@@ -1138,7 +1138,7 @@ class ServerArgs:
parser.add_argument( parser.add_argument(
"--enable-dp-attention", "--enable-dp-attention",
action="store_true", action="store_true",
help="Enabling data parallelism for attention and tensor parallelism for FFN. The dp size should be equal to the tp size. Currently only DeepSeek-V2 is supported.", help="Enabling data parallelism for attention and tensor parallelism for FFN. The dp size should be equal to the tp size. Currently DeepSeek-V2 and Qwen 2/3 MoE models are supported.",
) )
parser.add_argument( parser.add_argument(
"--enable-dp-lm-head", "--enable-dp-lm-head",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment