help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-<group_size>, fp8wo, fp8dq-per_tensor, fp8dq-per_row",
)
parser.add_argument(
"--modelopt-config",
type=str,
default=ServerArgs.modelopt_config,
help="Optimize the model with nvidia-modelopt. Experimental feature. Current choices are: fp8",