Commit 2b47bce9 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_USE_CUDA_GRAPH_SIZES(1) to use 1-24... (not only 1 2 4 8 16)

set VLLM_USE_LIGHTOP_FILL_MOE_ALIGN=1, VLLM_USE_OPT_ZEROS=1 and VLLM_USE_PP_SYNC=1
parent ce755d66
......@@ -4766,7 +4766,7 @@ class VllmConfig:
i for i in range(8, cuda_graph_sizes[0] + 1, 8)
]
else:
batch_size_capture_list = list(range(1, 19)) + [24, 32] + [
batch_size_capture_list = list(range(1, 25)) + [32] + [
i for i in range(40, cuda_graph_sizes[0] + 1, 8)
]
elif len(cuda_graph_sizes) > 1:
......
......@@ -1114,7 +1114,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
("true", "1")),
# vLLM will use elenmentwise not triton_
"VLLM_USE_OPT_ZEROS":
lambda: (os.environ.get("VLLM_USE_OPT_ZEROS", "False").lower() in
lambda: (os.environ.get("VLLM_USE_OPT_ZEROS", "True").lower() in
("true", "1")),
# vLLM will use opt cat for deepseek-v3
"VLLM_USE_OPT_CAT":
......@@ -1170,12 +1170,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vLLM will sync to avoid pp vmfault
"VLLM_USE_PP_SYNC":
lambda: (os.environ.get("VLLM_USE_PP_SYNC", "False").lower() in
lambda: (os.environ.get("VLLM_USE_PP_SYNC", "True").lower() in
("true", "1")),
# vLLM will use lightop to fuse fill and moe align
"VLLM_USE_LIGHTOP_FILL_MOE_ALIGN":
lambda: (os.environ.get("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN", "False").lower() in
lambda: (os.environ.get("VLLM_USE_LIGHTOP_FILL_MOE_ALIGN", "True").lower() in
("true", "1")),
# vllm will use custom-allreduce rmsquant fused op
......@@ -1191,9 +1191,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: (os.getenv('VLLM_USE_ZERO_MTP', '1').lower() in
("true", "1")),
# vllm will use 1-18... (not only 1 2 4 8 16)
# vllm will use 1-24... (not only 1 2 4 8 16 24)
"VLLM_USE_CUDA_GRAPH_SIZES":
lambda: (os.getenv('VLLM_USE_CUDA_GRAPH_SIZES', 'False').lower() in
lambda: (os.getenv('VLLM_USE_CUDA_GRAPH_SIZES', 'True').lower() in
("true", "1")),
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment