Commit 04e1940d authored by 王敏's avatar 王敏
Browse files

[fix]解决w8a8+mtp启动失败问题

parent 45d85414
...@@ -928,6 +928,7 @@ class ModelConfig: ...@@ -928,6 +928,7 @@ class ModelConfig:
# imports during override detection (e.g., MXFP4 imports Triton) # imports during override detection (e.g., MXFP4 imports Triton)
"mxfp4", "mxfp4",
"slimquant_w4a8_marlin", "slimquant_w4a8_marlin",
"slimquant_marlin",
"slimquant_compressed_tensors_marlin", "slimquant_compressed_tensors_marlin",
] ]
quantization_methods = [ quantization_methods = [
......
...@@ -38,6 +38,7 @@ QuantizationMethods = Literal[ ...@@ -38,6 +38,7 @@ QuantizationMethods = Literal[
"blockwise_int8", "blockwise_int8",
"slimquant_w4a8", "slimquant_w4a8",
"slimquant_w4a8_marlin", "slimquant_w4a8_marlin",
"slimquant_marlin",
"slimquant_compressed_tensors_marlin", "slimquant_compressed_tensors_marlin",
] ]
QUANTIZATION_METHODS: list[str] = list(get_args(QuantizationMethods)) QUANTIZATION_METHODS: list[str] = list(get_args(QuantizationMethods))
...@@ -154,6 +155,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]: ...@@ -154,6 +155,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
"blockwise_int8": BlockInt8Config, "blockwise_int8": BlockInt8Config,
"slimquant_w4a8":SlimQuantW4A8Int8Config, "slimquant_w4a8":SlimQuantW4A8Int8Config,
"slimquant_w4a8_marlin":SlimQuantW4A8Int8MarlinConfig, "slimquant_w4a8_marlin":SlimQuantW4A8Int8MarlinConfig,
"slimquant_marlin":SlimQuantCompressedTensorsMarlinConfig,
"slimquant_compressed_tensors_marlin":SlimQuantCompressedTensorsMarlinConfig, "slimquant_compressed_tensors_marlin":SlimQuantCompressedTensorsMarlinConfig,
} }
# Update the `method_to_config` with customized quantization methods. # Update the `method_to_config` with customized quantization methods.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment