"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "1fa5639438820b9288bd063a07ebf9e29a015b70"
Unverified Commit 64f296f8 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Minor] Improve the style of server args (#10328)

parent 956d805d
...@@ -869,12 +869,6 @@ class ServerArgs: ...@@ -869,12 +869,6 @@ class ServerArgs:
default=ServerArgs.tokenizer_path, default=ServerArgs.tokenizer_path,
help="The path of the tokenizer.", help="The path of the tokenizer.",
) )
parser.add_argument(
"--tokenizer-worker-num",
type=int,
default=ServerArgs.tokenizer_worker_num,
help="The worker num of the tokenizer manager.",
)
parser.add_argument( parser.add_argument(
"--tokenizer-mode", "--tokenizer-mode",
type=str, type=str,
...@@ -884,6 +878,12 @@ class ServerArgs: ...@@ -884,6 +878,12 @@ class ServerArgs:
"tokenizer if available, and 'slow' will " "tokenizer if available, and 'slow' will "
"always use the slow tokenizer.", "always use the slow tokenizer.",
) )
parser.add_argument(
"--tokenizer-worker-num",
type=int,
default=ServerArgs.tokenizer_worker_num,
help="The worker num of the tokenizer manager.",
)
parser.add_argument( parser.add_argument(
"--skip-tokenizer-init", "--skip-tokenizer-init",
action="store_true", action="store_true",
...@@ -1721,20 +1721,22 @@ class ServerArgs: ...@@ -1721,20 +1721,22 @@ class ServerArgs:
default=ServerArgs.moe_dense_tp_size, default=ServerArgs.moe_dense_tp_size,
help="TP size for MoE dense MLP layers. This flag is useful when, with large TP size, there are errors caused by weights in MLP layers having dimension smaller than the min dimension GEMM supports.", help="TP size for MoE dense MLP layers. This flag is useful when, with large TP size, there are errors caused by weights in MLP layers having dimension smaller than the min dimension GEMM supports.",
) )
# Mamba Cache # Mamba Cache
parser.add_argument( parser.add_argument(
"--max-mamba-cache-size", "--max-mamba-cache-size",
type=int, type=int,
default=ServerArgs.max_mamba_cache_size, default=ServerArgs.max_mamba_cache_size,
help="It is used for mamba cache memory static allocation.", help="The maximum size of the mamba cache.",
) )
parser.add_argument( parser.add_argument(
"--mamba-ssm-dtype", "--mamba-ssm-dtype",
type=str, type=str,
default=ServerArgs.mamba_ssm_dtype, default=ServerArgs.mamba_ssm_dtype,
choices=["float32", "bfloat16"], choices=["float32", "bfloat16"],
help="It is used to tune mamba ssm dtype", help="The data type of the SSM states in mamba cache.",
) )
# Hierarchical cache # Hierarchical cache
parser.add_argument( parser.add_argument(
"--enable-hierarchical-cache", "--enable-hierarchical-cache",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment