"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "848f9fe6ce868c2fc4cd4bc6b9c27bb9d556e32c"
Unverified Commit cd135bfe authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Update dsv3 quantization auto setting for sm100 (#12778)

parent fc84b073
...@@ -912,19 +912,32 @@ class ServerArgs: ...@@ -912,19 +912,32 @@ class ServerArgs:
logger.info( logger.info(
"Enable FlashInfer AllReduce Fusion on sm100 for DeepseekV3ForCausalLM" "Enable FlashInfer AllReduce Fusion on sm100 for DeepseekV3ForCausalLM"
) )
if self.moe_a2a_backend == "none" and self.moe_runner_backend == "auto": quantization_config = getattr(hf_config, "quantization_config", None)
self.moe_runner_backend = "flashinfer_trtllm" quant_method = (
logger.info( quantization_config.get("quant_method")
"Use flashinfer_trtllm as MoE runner backend on sm100 for DeepseekV3ForCausalLM" if quantization_config is not None
) else None
if self.quantization is None: )
# Default DeepSeek V3/R1 native FP8 when not explicitly set, if self.quantization is None:
# Because we need this condition for an assertion in # Default DeepSeek V3/R1 native FP8 when not explicitly set,
# flashinfer_trtllm MoE runner backend. # Because we need this condition for an assertion in
# flashinfer_trtllm MoE runner backend.
if quant_method is None:
self.quantization = "fp8" self.quantization = "fp8"
logger.info( logger.info(
"Quantization not specified, default to fp8 for DeepSeek on sm100" "Quantization not specified, default to fp8 for DeepSeek on sm100"
) )
else:
self.quantization = quant_method
if (
self.moe_a2a_backend == "none"
and self.moe_runner_backend == "auto"
and self.quantization in ["fp8", "modelopt_fp8", "modelopt_fp4"]
):
self.moe_runner_backend = "flashinfer_trtllm"
logger.info(
"Use flashinfer_trtllm as MoE runner backend on sm100 for DeepseekV3ForCausalLM"
)
elif model_arch in ["GptOssForCausalLM"]: elif model_arch in ["GptOssForCausalLM"]:
if ( if (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment