Commit 83c1f04a authored by zhuwenwen's avatar zhuwenwen
Browse files

update VLLM_USE_CAT_MLA

parent 4b00d1ba
...@@ -1198,7 +1198,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1198,7 +1198,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vllm will use fused cat and mla # vllm will use fused cat and mla
"VLLM_USE_CAT_MLA": "VLLM_USE_CAT_MLA":
lambda: (os.getenv('VLLM_USE_CAT_MLA', 'True').lower() in lambda: (os.getenv('VLLM_USE_CAT_MLA', 'False').lower() in
("true", "1")), ("true", "1")),
} }
......
...@@ -253,6 +253,8 @@ def get_model_architecture( ...@@ -253,6 +253,8 @@ def get_model_architecture(
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"): # if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
# os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1' # os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
if not envs.is_set("VLLM_USE_CAT_MLA"):
os.environ['VLLM_USE_CAT_MLA'] = '1'
else: else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1' os.environ['VLLM_USE_PD_SPLIT'] = '1'
...@@ -272,6 +274,8 @@ def get_model_architecture( ...@@ -272,6 +274,8 @@ def get_model_architecture(
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"): # if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
# os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1' # os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
if not envs.is_set("VLLM_USE_CAT_MLA"):
os.environ['VLLM_USE_CAT_MLA'] = '1'
else: else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1' os.environ['VLLM_USE_PD_SPLIT'] = '1'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment