Commit d95de221 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix pp error and update min_block_size

parent 083ff6ec
......@@ -45,12 +45,6 @@ class ReqMeta:
block_ids=block_ids_tensor,
num_tokens=len(token_ids),
)
self.parallel_config = vllm_config.parallel_config
self.model_config = vllm_config.model_config
self.total_num_hidden_layers = getattr(self.model_config.hf_text_config,
"num_hidden_layers", 0)
self.pp_size = self.parallel_config.pipeline_parallel_size
@dataclass
......@@ -92,6 +86,12 @@ class P2pNcclConnector(KVConnectorBase_V1):
hostname="",
port_offset=self._rank,
) if role == KVConnectorRole.WORKER else None
self.parallel_config = vllm_config.parallel_config
self.model_config = vllm_config.model_config
self.total_num_hidden_layers = getattr(self.model_config.hf_text_config,
"num_hidden_layers", 0)
self.pp_size = self.parallel_config.pipeline_parallel_size
# ==============================
# Worker-side methods
......
......@@ -63,7 +63,7 @@ class TensorMemoryPool:
than min_block_size
"""
def __init__(self, max_block_size: int, min_block_size: int = 512):
def __init__(self, max_block_size: int, min_block_size: int = 128):
if max_block_size <= 0 or min_block_size <= 0:
raise ValueError("Block sizes must be positive")
if max_block_size < min_block_size:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment