Commit d1fd831b authored by 王敏's avatar 王敏
Browse files

创建pcp分支

parent 2a79e7d5
......@@ -1186,7 +1186,8 @@ class VllmConfig:
if (
self.parallel_config.tensor_parallel_size > 1
and self.compilation_config.pass_config.enable_sp
and (self.compilation_config.pass_config.enable_sp
or envs.VLLM_MLA_CP)
):
cudagraph_capture_sizes = self.update_sizes_for_sequence_parallelism(
cudagraph_capture_sizes
......
......@@ -323,6 +323,10 @@ if TYPE_CHECKING:
USE_LIGHTOP_PER_TOKEN_GROUP_QUANT_FP8: bool = False
USE_LIGHTOP_TOPK: bool = False
USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX: bool = False
VLLM_MLA_CP: bool = False
VLLM_MLA_CPLB: bool = False
def get_default_cache_root():
return os.getenv(
"XDG_CACHE_HOME",
......@@ -2001,7 +2005,15 @@ environment_variables: dict[str, Callable[[], Any]] = {
("true", "1")),
"USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX":
lambda: (os.environ.get("USE_LIGHTOP_CONVERT_REQ_INDEX_TO_GLOBAL_INDEX", "False").lower() in
("true", "1")),
("true", "1")),
# If set to 1/True, enable mla context parallel
"VLLM_MLA_CP":
lambda: (os.environ.get("VLLM_MLA_CP", "False").lower() in
("true", "1")),
"VLLM_MLA_CPLB":
lambda: (os.environ.get("VLLM_MLA_CPLB", "False").lower() in
("true", "1")),
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment