Commit 90fc8f66 authored by zhuwenwen's avatar zhuwenwen
Browse files

update VLLM_USE_PD_SPLIT=0

parent d95a8fff
...@@ -1157,7 +1157,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1157,7 +1157,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
("true", "1")), ("true", "1")),
# vLLM will split prefill and decode, not mix up # vLLM will split prefill and decode, not mix up
"VLLM_USE_PD_SPLIT": "VLLM_USE_PD_SPLIT":
lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "True").lower() in lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "False").lower() in
("true", "1")), ("true", "1")),
# vLLM will sync to avoid pp vmfault # vLLM will sync to avoid pp vmfault
"VLLM_USE_PP_SYNC": "VLLM_USE_PP_SYNC":
......
...@@ -253,8 +253,9 @@ def get_model_architecture( ...@@ -253,8 +253,9 @@ def get_model_architecture(
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"): # if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
# os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1' # os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '0' os.environ['VLLM_USE_PD_SPLIT'] = '1'
if os.getenv('GEMM_PAD') != '1': if os.getenv('GEMM_PAD') != '1':
...@@ -271,8 +272,9 @@ def get_model_architecture( ...@@ -271,8 +272,9 @@ def get_model_architecture(
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"): # if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
# os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1' # os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
else:
if not envs.is_set("VLLM_USE_PD_SPLIT"): if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '0' os.environ['VLLM_USE_PD_SPLIT'] = '1'
# awq相关配置 # awq相关配置
try: try:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment