Unverified Commit 9dec582d authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Remove --modelopt-config in server_args (#2758)

parent b01febdc
...@@ -150,7 +150,6 @@ class ModelRunner: ...@@ -150,7 +150,6 @@ class ModelRunner:
"enable_nan_detection": server_args.enable_nan_detection, "enable_nan_detection": server_args.enable_nan_detection,
"enable_dp_attention": server_args.enable_dp_attention, "enable_dp_attention": server_args.enable_dp_attention,
"enable_ep_moe": server_args.enable_ep_moe, "enable_ep_moe": server_args.enable_ep_moe,
"modelopt_config": server_args.modelopt_config,
} }
) )
......
...@@ -149,7 +149,6 @@ class ServerArgs: ...@@ -149,7 +149,6 @@ class ServerArgs:
torch_compile_max_bs: int = 32 torch_compile_max_bs: int = 32
cuda_graph_max_bs: Optional[int] = None cuda_graph_max_bs: Optional[int] = None
torchao_config: str = "" torchao_config: str = ""
modelopt_config: str = ""
enable_nan_detection: bool = False enable_nan_detection: bool = False
enable_p2p_check: bool = False enable_p2p_check: bool = False
triton_attention_reduce_in_fp32: bool = False triton_attention_reduce_in_fp32: bool = False
...@@ -810,12 +809,6 @@ class ServerArgs: ...@@ -810,12 +809,6 @@ class ServerArgs:
default=ServerArgs.torchao_config, default=ServerArgs.torchao_config,
help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-<group_size>, fp8wo, fp8dq-per_tensor, fp8dq-per_row", help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-<group_size>, fp8wo, fp8dq-per_tensor, fp8dq-per_row",
) )
parser.add_argument(
"--modelopt-config",
type=str,
default=ServerArgs.modelopt_config,
help="Optimize the model with nvidia-modelopt. Experimental feature. Current choices are: fp8",
)
parser.add_argument( parser.add_argument(
"--enable-nan-detection", "--enable-nan-detection",
action="store_true", action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment