Commit 813f81fb authored by zhuwenwen's avatar zhuwenwen
Browse files

update VLLM_USE_LIGHTOP_FILL_MOE_ALIGN

parent 70f1c878
......@@ -179,7 +179,7 @@ if TYPE_CHECKING:
VLLM_SCHED_ENABLE_MINIMAL_INJECTION: bool = False
VLLM_USE_PD_SPLIT: bool = False
VLLM_USE_PP_SYNC: bool = False
VLLM_USE_LIGHTOP_FILL_MOE_ALIN: bool = False
VLLM_USE_LIGHTOP_FILL_MOE_ALIGN: bool = False
def get_default_cache_root():
return os.getenv(
......@@ -1163,8 +1163,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: (os.environ.get("VLLM_USE_PP_SYNC", "False").lower() in
("true", "1")),
# vLLM will use lightop to fuse fill and moe align
"VLLM_USE_LIGHTOP_FILL_MOE_ALIN":
lambda: (os.environ.get("VLLM_USE_LIGHTOP_FILL_MOE_ALIN", "False").lower() in
"VLLM_USE_LIGHTOP_FILL_MOE_ALIGN":
lambda: (os.environ.get("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN", "False").lower() in
("true", "1")),
}
......
......@@ -216,7 +216,7 @@ def moe_align_block_size(
sorted_ids = torch.empty((max_num_tokens_padded, ),
dtype=torch.int32,
device=topk_ids.device)
if not envs.VLLM_USE_LIGHTOP_FILL_MOE_ALIN:
if not envs.VLLM_USE_LIGHTOP_FILL_MOE_ALIGN:
sorted_ids.fill_(topk_ids.numel())
max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
......
......@@ -251,8 +251,8 @@ def get_model_architecture(
os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '1'
if not envs.is_set("VLLM_USE_OPT_CAT"):
os.environ['VLLM_USE_OPT_CAT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIN"):
# os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIN'] = '1'
if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
if os.getenv('GEMM_PAD') != '1':
os.environ['GEMM_PAD'] = '0'
......@@ -266,8 +266,8 @@ def get_model_architecture(
os.environ['VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD'] = '1'
if not envs.is_set("VLLM_USE_OPT_CAT"):
os.environ['VLLM_USE_OPT_CAT'] = '1'
# if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIN"):
# os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIN'] = '1'
if not envs.is_set("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN"):
os.environ['VLLM_USE_LIGHTOP_FILL_MOE_ALIGN'] = '1'
# awq相关配置
try:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment