Commit ce755d66 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-zero-mtp' into 'v0.9.2-dev'

feat: pp mtp加入零消耗调度,加入环境变量VLLM_USE_ZERO_MTP,默认打开

See merge request dcutoolkit/deeplearing/vllm!264
parents d126ce21 c50f084a
...@@ -182,6 +182,7 @@ if TYPE_CHECKING: ...@@ -182,6 +182,7 @@ if TYPE_CHECKING:
VLLM_USE_LIGHTOP_FILL_MOE_ALIGN: bool = False VLLM_USE_LIGHTOP_FILL_MOE_ALIGN: bool = False
USE_FUSED_CUSTOM_ALL_REDUCE_RMS_QUANT: bool = False USE_FUSED_CUSTOM_ALL_REDUCE_RMS_QUANT: bool = False
VLLM_USE_PP_BALANCE: bool = False VLLM_USE_PP_BALANCE: bool = False
VLLM_USE_ZERO_MTP: bool = False
VLLM_USE_CUDA_GRAPH_SIZES: bool = False VLLM_USE_CUDA_GRAPH_SIZES: bool = False
def get_default_cache_root(): def get_default_cache_root():
...@@ -1186,6 +1187,10 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1186,6 +1187,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: (os.getenv('VLLM_USE_PP_BALANCE', '1').lower() in lambda: (os.getenv('VLLM_USE_PP_BALANCE', '1').lower() in
("true", "1")), ("true", "1")),
"VLLM_USE_ZERO_MTP":
lambda: (os.getenv('VLLM_USE_ZERO_MTP', '1').lower() in
("true", "1")),
# vllm will use 1-18... (not only 1 2 4 8 16) # vllm will use 1-18... (not only 1 2 4 8 16)
"VLLM_USE_CUDA_GRAPH_SIZES": "VLLM_USE_CUDA_GRAPH_SIZES":
lambda: (os.getenv('VLLM_USE_CUDA_GRAPH_SIZES', 'False').lower() in lambda: (os.getenv('VLLM_USE_CUDA_GRAPH_SIZES', 'False').lower() in
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment