f"EP MoE is enabled. The expert parallel size is adjusted to be the same as the tensor parallel size[{self.tp_size}]."
)
# Others
ifself.enable_dp_attention:
self.dp_size=self.tp_size
...
...
@@ -229,12 +238,6 @@ class ServerArgs:
"Data parallel size is adjusted to be the same as tensor parallel size. "
"Overlap scheduler is disabled."
)
# Expert parallelism
ifself.enable_ep_moe:
self.ep_size=self.tp_size
logger.info(
f"EP MoE is enabled. The expert parallel size is adjusted to be the same as the tensor parallel size[{self.tp_size}]."
)
# GGUF
if(
...
...
@@ -430,13 +433,18 @@ class ServerArgs:
default=ServerArgs.schedule_conservativeness,
help="How conservative the schedule policy is. A larger value means more conservative scheduling. Use a larger value if you see requests being retracted frequently.",
)
parser.add_argument(
"--cpu-offload-gb",
type=int,
default=ServerArgs.cpu_offload_gb,
help="How many GBs of RAM to reserve for CPU offloading",
)
parser.add_argument(
"--prefill-only-one-req",
type=bool,
help="If true, we only prefill one request at one prefill batch",
default=ServerArgs.prefill_only_one_req,
)
# Other runtime options
parser.add_argument(
...
...
@@ -555,6 +563,7 @@ class ServerArgs:
"shortest_queue",
],
)
# Expert parallelism
parser.add_argument(
"--expert-parallel-size",
...
...
@@ -777,28 +786,6 @@ class ServerArgs:
help="Delete the model checkpoint after loading the model.",
)
# Deprecated arguments
parser.add_argument(
"--enable-overlap-schedule",
action=DeprecatedAction,
help="'--enable-overlap-schedule' is deprecated. It is enabled by default now. Please drop this argument.",
)
parser.add_argument(
"--disable-flashinfer",
action=DeprecatedAction,
help="'--disable-flashinfer' is deprecated. Please use '--attention-backend triton' instead.",
)
parser.add_argument(
"--disable-flashinfer-sampling",
action=DeprecatedAction,
help="'--disable-flashinfer-sampling' is deprecated. Please use '--sampling-backend pytroch' instead.",
)
parser.add_argument(
"--disable-disk-cache",
action=DeprecatedAction,
help="'--disable-disk-cache' is deprecated. Please use '--disable-outlines-disk-cache' instead.",