Commit acf9f945 authored by zhuwenwen's avatar zhuwenwen
Browse files

修复pd分离开cp引起的校检问题

update VLLM_USE_PD_SPLIT=0 (for dspk)and  VLLM_USE_PD_SPLIT=1 (for others)
parent cf975626
...@@ -481,7 +481,9 @@ class P2pNcclConnector(KVConnectorBase_V1): ...@@ -481,7 +481,9 @@ class P2pNcclConnector(KVConnectorBase_V1):
num_scheduled_tokens = ( num_scheduled_tokens = (
scheduler_output.num_scheduled_tokens)[req_id] scheduler_output.num_scheduled_tokens)[req_id]
num_tokens = (num_scheduled_tokens + num_computed_tokens) num_tokens = (num_scheduled_tokens + num_computed_tokens)
assert req_id in self.chunked_prefill # assert req_id in self.chunked_prefill
if req_id not in self.chunked_prefill:
continue
block_ids = new_block_ids[0] block_ids = new_block_ids[0]
if not resumed_from_preemption: if not resumed_from_preemption:
block_ids = (self.chunked_prefill[req_id][0] + block_ids) block_ids = (self.chunked_prefill[req_id][0] + block_ids)
......
...@@ -1642,7 +1642,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1642,7 +1642,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
("true", "1")), ("true", "1")),
# vLLM will split prefill and decode, not mix up # vLLM will split prefill and decode, not mix up
"VLLM_USE_PD_SPLIT": "VLLM_USE_PD_SPLIT":
lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "True").lower() in lambda: (os.environ.get("VLLM_USE_PD_SPLIT", "False").lower() in
("true", "1")), ("true", "1")),
# vLLM will sync to avoid pp vmfault # vLLM will sync to avoid pp vmfault
"VLLM_USE_PP_SYNC": "VLLM_USE_PP_SYNC":
......
...@@ -198,6 +198,9 @@ def _get_model_architecture( ...@@ -198,6 +198,9 @@ def _get_model_architecture(
os.environ['VLLM_USE_LIGHTOP'] = '1' os.environ['VLLM_USE_LIGHTOP'] = '1'
if not envs.is_set("VLLM_USE_OPT_CAT"): if not envs.is_set("VLLM_USE_OPT_CAT"):
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
else:
if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1'
if os.getenv('GEMM_PAD') != '1': if os.getenv('GEMM_PAD') != '1':
os.environ['GEMM_PAD'] = '0' os.environ['GEMM_PAD'] = '0'
...@@ -209,6 +212,9 @@ def _get_model_architecture( ...@@ -209,6 +212,9 @@ def _get_model_architecture(
os.environ['VLLM_USE_LIGHTOP'] = '1' os.environ['VLLM_USE_LIGHTOP'] = '1'
if not envs.is_set("VLLM_USE_OPT_CAT"): if not envs.is_set("VLLM_USE_OPT_CAT"):
os.environ['VLLM_USE_OPT_CAT'] = '1' os.environ['VLLM_USE_OPT_CAT'] = '1'
else:
if not envs.is_set("VLLM_USE_PD_SPLIT"):
os.environ['VLLM_USE_PD_SPLIT'] = '1'
# awq相关配置 # awq相关配置
try: try:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment