Commit 04e1940d authored by 王敏's avatar 王敏
Browse files

[fix]解决w8a8+mtp启动失败问题

parent 45d85414
......@@ -928,6 +928,7 @@ class ModelConfig:
# imports during override detection (e.g., MXFP4 imports Triton)
"mxfp4",
"slimquant_w4a8_marlin",
"slimquant_marlin",
"slimquant_compressed_tensors_marlin",
]
quantization_methods = [
......
......@@ -38,6 +38,7 @@ QuantizationMethods = Literal[
"blockwise_int8",
"slimquant_w4a8",
"slimquant_w4a8_marlin",
"slimquant_marlin",
"slimquant_compressed_tensors_marlin",
]
QUANTIZATION_METHODS: list[str] = list(get_args(QuantizationMethods))
......@@ -154,6 +155,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
"blockwise_int8": BlockInt8Config,
"slimquant_w4a8":SlimQuantW4A8Int8Config,
"slimquant_w4a8_marlin":SlimQuantW4A8Int8MarlinConfig,
"slimquant_marlin":SlimQuantCompressedTensorsMarlinConfig,
"slimquant_compressed_tensors_marlin":SlimQuantCompressedTensorsMarlinConfig,
}
# Update the `method_to_config` with customized quantization methods.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment