Commit 2bd176dd authored by zhouxiang's avatar zhouxiang
Browse files

添加numa绑定能力

parent 988eb4e6
...@@ -15,3 +15,4 @@ torch == 2.4.1 ...@@ -15,3 +15,4 @@ torch == 2.4.1
triton == 3.0.0 triton == 3.0.0
flash_attn == 2.6.1 flash_attn == 2.6.1
lmslim == 0.2.0 lmslim == 0.2.0
numa
\ No newline at end of file
...@@ -10,12 +10,39 @@ from vllm.utils import (get_distributed_init_method, get_ip, get_open_port, ...@@ -10,12 +10,39 @@ from vllm.utils import (get_distributed_init_method, get_ip, get_open_port,
make_async) make_async)
from vllm.worker.worker_base import WorkerBase, WorkerWrapperBase from vllm.worker.worker_base import WorkerBase, WorkerWrapperBase
import numa,os
# 设置当前进程绑定到 NUMA 节点
def bind_to_numa(local_rank):
env_str = f"VLLM_RANK{local_rank}_NUMA"
node_count = numa.get_max_node() + 1
numa_node = int(os.getenv(env_str, -1))
# 未配置环境变量或配置错误则不做绑定,TODO:根据topo自动绑定方案
if numa_node < 0:
logger.warning("%s is unset or set incorrectly, vllm will not bind to numa! %s = %d", env_str, env_str, numa_node)
return
if numa_node > numa.get_max_node():
raise ValueError(f"NUMA node {numa_node} is not available.")
numa.bind([numa_node])
logger = init_logger(__name__) logger = init_logger(__name__)
def create_worker(worker_module_name: str, worker_class_name: str, def create_worker(worker_module_name: str, worker_class_name: str,
worker_class_fn: Optional[Callable[[], Type[WorkerBase]]], worker_class_fn: Optional[Callable[[], Type[WorkerBase]]],
**kwargs): **kwargs):
VLLM_NUMA_BIND = int(os.getenv("VLLM_NUMA_BIND", 1))
if VLLM_NUMA_BIND > 0:
# 绑定当前进程到指定 NUMA 节点
bind_to_numa(kwargs['local_rank'])
pid = os.getpid()
logger.info("########## %d process(rank%s) is running on CPU(s): %s", pid, str(kwargs['local_rank']), str(os.sched_getaffinity(pid)))
logger.info("########## %d process(rank%s) is running on memnode(s): %s", pid, str(kwargs['local_rank']), str(numa.get_membind()))
wrapper = WorkerWrapperBase( wrapper = WorkerWrapperBase(
worker_module_name=worker_module_name, worker_module_name=worker_module_name,
worker_class_name=worker_class_name, worker_class_name=worker_class_name,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment