Commit 8548cf87 authored by zhuwenwen's avatar zhuwenwen
Browse files

set VLLM_USE_LIGHTOP_FILL_MOE_ALIGN=0 when use dpsk-v3 non-quantitative

parent a1314cac
...@@ -251,8 +251,6 @@ def get_model_architecture( ...@@ -251,8 +251,6 @@ def get_model_architecture(
os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '1' os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '1'
if not envs.is_set("VLLM_USE_OPT_CAT"): if not envs.is_set("VLLM_USE_OPT_CAT"):
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
if not envs.is_set("VLLM_USE_CAT_MLA"): if not envs.is_set("VLLM_USE_CAT_MLA"):
os.environ['VLLM_USE_CAT_MLA'] = '1' os.environ['VLLM_USE_CAT_MLA'] = '1'
if not envs.is_set("VLLM_REJECT_SAMPLE_OPT"): if not envs.is_set("VLLM_REJECT_SAMPLE_OPT"):
...@@ -266,6 +264,8 @@ def get_model_architecture( ...@@ -266,6 +264,8 @@ def get_model_architecture(
os.environ['USE_FUSED_RMS_QUANT'] = '1' os.environ['USE_FUSED_RMS_QUANT'] = '1'
if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"): if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1' os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
else: else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1' os.environ['VLLM_USE_PD_SPLIT'] = '1'
...@@ -296,8 +296,6 @@ def get_model_architecture( ...@@ -296,8 +296,6 @@ def get_model_architecture(
os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '1' os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '1'
if not envs.is_set("VLLM_USE_OPT_CAT"): if not envs.is_set("VLLM_USE_OPT_CAT"):
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
if not envs.is_set("VLLM_USE_CAT_MLA"): if not envs.is_set("VLLM_USE_CAT_MLA"):
os.environ['VLLM_USE_CAT_MLA'] = '1' os.environ['VLLM_USE_CAT_MLA'] = '1'
if not envs.is_set("VLLM_REJECT_SAMPLE_OPT"): if not envs.is_set("VLLM_REJECT_SAMPLE_OPT"):
...@@ -307,11 +305,12 @@ def get_model_architecture( ...@@ -307,11 +305,12 @@ def get_model_architecture(
if not envs.is_set("VLLM_SCHED_ENABLE_MINIMAL_INJECTION"): if not envs.is_set("VLLM_SCHED_ENABLE_MINIMAL_INJECTION"):
os.environ['VLLM_SCHED_ENABLE_MINIMAL_INJECTION'] = '1' os.environ['VLLM_SCHED_ENABLE_MINIMAL_INJECTION'] = '1'
if model_config.quantization in {"slimquant_w4a8", "slimquant_w4a8_marlin", "slimquant_compressed_tensors_marlin", "compressed-tensors"}: if model_config.quantization in {"slimquant_w4a8", "slimquant_w4a8_marlin", "slimquant_compressed_tensors_marlin", "compressed-tensors"}:
if not envs.is_set("USE_FUSED_RMS_QUANT"): if not envs.is_set("USE_FUSED_RMS_QUANT"):
os.environ['USE_FUSED_RMS_QUANT'] = '1' os.environ['USE_FUSED_RMS_QUANT'] = '1'
if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"): if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1' os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
else: else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1' os.environ['VLLM_USE_PD_SPLIT'] = '1'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment