Commit c0e0e7cd authored by zhuwenwen's avatar zhuwenwen
Browse files

update USE_FUSED_RMS_QUANT=1 and USE_FUSED_SILU_MUL_QUANT=1 for dpsk-v3 quantitative models

parent 67f4b1b4
...@@ -259,12 +259,13 @@ def get_model_architecture( ...@@ -259,12 +259,13 @@ def get_model_architecture(
os.environ['VLLM_REJECT_SAMPLE_OPT'] = '1' os.environ['VLLM_REJECT_SAMPLE_OPT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT"): # if not envs.is_set("VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT"):
# os.environ['VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT'] = '1' # os.environ['VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT'] = '1'
if not envs.is_set("VLLM_SCHED_ENABLE_MINIMAL_INJECTION"):
os.environ['VLLM_SCHED_ENABLE_MINIMAL_INJECTION'] = '1'
if model_config.quantization is not None:
if not envs.is_set("USE_FUSED_RMS_QUANT"): if not envs.is_set("USE_FUSED_RMS_QUANT"):
os.environ['USE_FUSED_RMS_QUANT'] = '1' os.environ['USE_FUSED_RMS_QUANT'] = '1'
if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"): if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1' os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
if not envs.is_set("VLLM_SCHED_ENABLE_MINIMAL_INJECTION"):
os.environ['VLLM_SCHED_ENABLE_MINIMAL_INJECTION'] = '1'
else: else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1' os.environ['VLLM_USE_PD_SPLIT'] = '1'
...@@ -303,12 +304,13 @@ def get_model_architecture( ...@@ -303,12 +304,13 @@ def get_model_architecture(
os.environ['VLLM_REJECT_SAMPLE_OPT'] = '1' os.environ['VLLM_REJECT_SAMPLE_OPT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT"): # if not envs.is_set("VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT"):
# os.environ['VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT'] = '1' # os.environ['VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT'] = '1'
if not envs.is_set("VLLM_SCHED_ENABLE_MINIMAL_INJECTION"):
os.environ['VLLM_SCHED_ENABLE_MINIMAL_INJECTION'] = '1'
if model_config.quantization is not None:
if not envs.is_set("USE_FUSED_RMS_QUANT"): if not envs.is_set("USE_FUSED_RMS_QUANT"):
os.environ['USE_FUSED_RMS_QUANT'] = '1' os.environ['USE_FUSED_RMS_QUANT'] = '1'
if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"): if not envs.is_set("USE_FUSED_SILU_MUL_QUANT"):
os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1' os.environ['USE_FUSED_SILU_MUL_QUANT'] = '1'
if not envs.is_set("VLLM_SCHED_ENABLE_MINIMAL_INJECTION"):
os.environ['VLLM_SCHED_ENABLE_MINIMAL_INJECTION'] = '1'
else: else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1' os.environ['VLLM_USE_PD_SPLIT'] = '1'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment