Commit d1fd831b authored by 王敏's avatar 王敏
Browse files

创建pcp分支

parent 2a79e7d5
...@@ -1186,7 +1186,8 @@ class VllmConfig: ...@@ -1186,7 +1186,8 @@ class VllmConfig:
if ( if (
self.parallel_config.tensor_parallel_size > 1 self.parallel_config.tensor_parallel_size > 1
and self.compilation_config.pass_config.enable_sp and (self.compilation_config.pass_config.enable_sp
or envs.VLLM_MLA_CP)
): ):
cudagraph_capture_sizes = self.update_sizes_for_sequence_parallelism( cudagraph_capture_sizes = self.update_sizes_for_sequence_parallelism(
cudagraph_capture_sizes cudagraph_capture_sizes
......
...@@ -323,6 +323,10 @@ if TYPE_CHECKING: ...@@ -323,6 +323,10 @@ if TYPE_CHECKING:
USE_LIGHTOP_PER_TOKEN_GROUP_QUANT_FP8: bool = False USE_LIGHTOP_PER_TOKEN_GROUP_QUANT_FP8: bool = False
USE_LIGHTOP_TOPK: bool = False USE_LIGHTOP_TOPK: bool = False
USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX: bool = False USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX: bool = False
VLLM_MLA_CP: bool = False
VLLM_MLA_CPLB: bool = False
def get_default_cache_root(): def get_default_cache_root():
return os.getenv( return os.getenv(
"XDG_CACHE_HOME", "XDG_CACHE_HOME",
...@@ -2003,6 +2007,14 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -2003,6 +2007,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: (os.environ.get("USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX", "False").lower() in lambda: (os.environ.get("USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX", "False").lower() in
("true", "1")), ("true", "1")),
# If set to 1/True, enable mla context parallel
"VLLM_MLA_CP":
lambda: (os.environ.get("VLLM_MLA_CP", "False").lower() in
("true", "1")),
"VLLM_MLA_CPLB":
lambda: (os.environ.get("VLLM_MLA_CPLB", "False").lower() in
("true", "1")),
} }
# --8<-- [end:env-vars-definition] # --8<-- [end:env-vars-definition]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment