Unverified Commit 7a6c8c3f authored by iAmir97's avatar iAmir97 Committed by GitHub
Browse files

[Chore] Separate out `vllm.utils.network_utils` (#27164)


Signed-off-by: default avatariAmir97 <Amir.balwel@embeddedllm.com>
Co-authored-by: default avatariAmir97 <Amir.balwel@embeddedllm.com>
parent 221bf725
...@@ -49,10 +49,8 @@ from vllm.distributed.device_communicators.base_device_communicator import ( ...@@ -49,10 +49,8 @@ from vllm.distributed.device_communicators.base_device_communicator import (
) )
from vllm.distributed.utils import StatelessProcessGroup from vllm.distributed.utils import StatelessProcessGroup
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import (
get_distributed_init_method,
)
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.network_utils import get_distributed_init_method
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
direct_register_custom_op, direct_register_custom_op,
supports_custom_op, supports_custom_op,
......
...@@ -29,7 +29,7 @@ from torch.distributed.rendezvous import rendezvous ...@@ -29,7 +29,7 @@ from torch.distributed.rendezvous import rendezvous
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import get_tcp_uri from vllm.utils.network_utils import get_tcp_uri
from vllm.utils.torch_utils import is_torch_equal_or_newer from vllm.utils.torch_utils import is_torch_equal_or_newer
logger = init_logger(__name__) logger = init_logger(__name__)
......
...@@ -81,8 +81,9 @@ from vllm.transformers_utils.config import ( ...@@ -81,8 +81,9 @@ from vllm.transformers_utils.config import (
maybe_override_with_speculators, maybe_override_with_speculators,
) )
from vllm.transformers_utils.utils import check_gguf_file from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import FlexibleArgumentParser, get_ip, is_in_ray_actor from vllm.utils import FlexibleArgumentParser, is_in_ray_actor
from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.network_utils import get_ip
from vllm.v1.sample.logits_processor import LogitsProcessor from vllm.v1.sample.logits_processor import LogitsProcessor
if TYPE_CHECKING: if TYPE_CHECKING:
......
...@@ -21,9 +21,9 @@ from vllm.usage.usage_lib import UsageContext ...@@ -21,9 +21,9 @@ from vllm.usage.usage_lib import UsageContext
from vllm.utils import ( from vllm.utils import (
FlexibleArgumentParser, FlexibleArgumentParser,
decorate_logs, decorate_logs,
get_tcp_uri,
set_process_title, set_process_title,
) )
from vllm.utils.network_utils import get_tcp_uri
from vllm.v1.engine.core import EngineCoreProc from vllm.v1.engine.core import EngineCoreProc
from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines from vllm.v1.engine.utils import CoreEngineProcManager, launch_core_engines
from vllm.v1.executor.abstract import Executor from vllm.v1.executor.abstract import Executor
......
...@@ -18,7 +18,7 @@ from vllm.entrypoints.constants import ( ...@@ -18,7 +18,7 @@ from vllm.entrypoints.constants import (
) )
from vllm.entrypoints.ssl import SSLCertRefresher from vllm.entrypoints.ssl import SSLCertRefresher
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import find_process_using_port from vllm.utils.network_utils import find_process_using_port
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
logger = init_logger(__name__) logger = init_logger(__name__)
......
...@@ -115,9 +115,9 @@ from vllm.utils import ( ...@@ -115,9 +115,9 @@ from vllm.utils import (
Device, Device,
FlexibleArgumentParser, FlexibleArgumentParser,
decorate_logs, decorate_logs,
is_valid_ipv6_address,
set_ulimit, set_ulimit,
) )
from vllm.utils.network_utils import is_valid_ipv6_address
from vllm.v1.engine.exceptions import EngineDeadError from vllm.v1.engine.exceptions import EngineDeadError
from vllm.v1.metrics.prometheus import get_prometheus_registry from vllm.v1.metrics.prometheus import get_prometheus_registry
from vllm.version import __version__ as VLLM_VERSION from vllm.version import __version__ as VLLM_VERSION
......
...@@ -19,12 +19,12 @@ from vllm.logger import init_logger ...@@ -19,12 +19,12 @@ from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.ray.ray_env import get_env_vars_to_copy from vllm.ray.ray_env import get_env_vars_to_copy
from vllm.sequence import ExecuteModelRequest from vllm.sequence import ExecuteModelRequest
from vllm.utils import ( from vllm.utils.asyncio import make_async
from vllm.utils.network_utils import (
get_distributed_init_method, get_distributed_init_method,
get_ip, get_ip,
get_open_port, get_open_port,
) )
from vllm.utils.asyncio import make_async
from vllm.v1.outputs import SamplerOutput from vllm.v1.outputs import SamplerOutput
if ray is not None: if ray is not None:
......
...@@ -15,7 +15,7 @@ from vllm.executor.msgspec_utils import decode_hook, encode_hook ...@@ -15,7 +15,7 @@ from vllm.executor.msgspec_utils import decode_hook, encode_hook
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.sequence import ExecuteModelRequest, IntermediateTensors from vllm.sequence import ExecuteModelRequest, IntermediateTensors
from vllm.utils import get_ip from vllm.utils.network_utils import get_ip
from vllm.v1.outputs import AsyncModelRunnerOutput from vllm.v1.outputs import AsyncModelRunnerOutput
from vllm.v1.worker.worker_base import WorkerWrapperBase from vllm.v1.worker.worker_base import WorkerWrapperBase
......
...@@ -13,7 +13,8 @@ import torch.distributed as dist ...@@ -13,7 +13,8 @@ import torch.distributed as dist
import vllm.envs as envs import vllm.envs as envs
from vllm.executor.executor_base import ExecutorBase from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import get_distributed_init_method, get_ip, get_open_port, run_method from vllm.utils import run_method
from vllm.utils.network_utils import get_distributed_init_method, get_ip, get_open_port
from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
from vllm.v1.outputs import AsyncModelRunnerOutput from vllm.v1.outputs import AsyncModelRunnerOutput
from vllm.v1.worker.worker_base import WorkerWrapperBase from vllm.v1.worker.worker_base import WorkerWrapperBase
......
...@@ -7,12 +7,10 @@ import enum ...@@ -7,12 +7,10 @@ import enum
import getpass import getpass
import importlib import importlib
import inspect import inspect
import ipaddress
import json import json
import multiprocessing import multiprocessing
import os import os
import signal import signal
import socket
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
...@@ -33,15 +31,12 @@ from argparse import ( ...@@ -33,15 +31,12 @@ from argparse import (
from collections import defaultdict from collections import defaultdict
from collections.abc import ( from collections.abc import (
Callable, Callable,
Iterator,
Sequence, Sequence,
) )
from concurrent.futures.process import ProcessPoolExecutor from concurrent.futures.process import ProcessPoolExecutor
from functools import cache, partial, wraps from functools import cache, partial, wraps
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, TextIO, TypeVar from typing import TYPE_CHECKING, Any, TextIO, TypeVar
from urllib.parse import urlparse
from uuid import uuid4
import cloudpickle import cloudpickle
import psutil import psutil
...@@ -49,34 +44,36 @@ import regex as re ...@@ -49,34 +44,36 @@ import regex as re
import setproctitle import setproctitle
import torch import torch
import yaml import yaml
import zmq
import zmq.asyncio
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import enable_trace_function_call, init_logger from vllm.logger import enable_trace_function_call, init_logger
from vllm.ray.lazy_utils import is_in_ray_actor from vllm.ray.lazy_utils import is_in_ray_actor
_DEPRECATED_PROFILING = {"cprofile", "cprofile_context"} _DEPRECATED_MAPPINGS = {
"cprofile": "profiling",
"cprofile_context": "profiling",
"get_open_port": "network_utils",
}
def __getattr__(name: str) -> Any: # noqa: D401 - short deprecation docstring def __getattr__(name: str) -> Any: # noqa: D401 - short deprecation docstring
"""Module-level getattr to handle deprecated profiling utilities.""" """Module-level getattr to handle deprecated utilities."""
if name in _DEPRECATED_PROFILING: if name in _DEPRECATED_MAPPINGS:
submodule_name = _DEPRECATED_MAPPINGS[name]
warnings.warn( warnings.warn(
f"vllm.utils.{name} is deprecated and will be removed in a future version. " f"vllm.utils.{name} is deprecated and will be removed in a future version. "
f"Use vllm.utils.profiling.{name} instead.", f"Use vllm.utils.{submodule_name}.{name} instead.",
DeprecationWarning, DeprecationWarning,
stacklevel=2, stacklevel=2,
) )
import vllm.utils.profiling as _prof module = __import__(f"vllm.utils.{submodule_name}", fromlist=[submodule_name])
return getattr(module, name)
return getattr(_prof, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}") raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def __dir__() -> list[str]: def __dir__() -> list[str]:
# expose deprecated names in dir() for better UX/tab-completion # expose deprecated names in dir() for better UX/tab-completion
return sorted(list(globals().keys()) + list(_DEPRECATED_PROFILING)) return sorted(list(globals().keys()) + list(_DEPRECATED_MAPPINGS.keys()))
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -149,197 +146,6 @@ def random_uuid() -> str: ...@@ -149,197 +146,6 @@ def random_uuid() -> str:
return str(uuid.uuid4().hex) return str(uuid.uuid4().hex)
def close_sockets(sockets: Sequence[zmq.Socket | zmq.asyncio.Socket]):
for sock in sockets:
if sock is not None:
sock.close(linger=0)
def get_ip() -> str:
host_ip = envs.VLLM_HOST_IP
if "HOST_IP" in os.environ and "VLLM_HOST_IP" not in os.environ:
logger.warning(
"The environment variable HOST_IP is deprecated and ignored, as"
" it is often used by Docker and other software to"
" interact with the container's network stack. Please "
"use VLLM_HOST_IP instead to set the IP address for vLLM processes"
" to communicate with each other."
)
if host_ip:
return host_ip
# IP is not set, try to get it from the network interface
# try ipv4
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect(("8.8.8.8", 80)) # Doesn't need to be reachable
return s.getsockname()[0]
except Exception:
pass
# try ipv6
try:
s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
# Google's public DNS server, see
# https://developers.google.com/speed/public-dns/docs/using#addresses
s.connect(("2001:4860:4860::8888", 80)) # Doesn't need to be reachable
return s.getsockname()[0]
except Exception:
pass
warnings.warn(
"Failed to get the IP address, using 0.0.0.0 by default."
"The value can be set by the environment variable"
" VLLM_HOST_IP or HOST_IP.",
stacklevel=2,
)
return "0.0.0.0"
def test_loopback_bind(address, family):
try:
s = socket.socket(family, socket.SOCK_DGRAM)
s.bind((address, 0)) # Port 0 = auto assign
s.close()
return True
except OSError:
return False
def get_loopback_ip() -> str:
loopback_ip = envs.VLLM_LOOPBACK_IP
if loopback_ip:
return loopback_ip
# VLLM_LOOPBACK_IP is not set, try to get it based on network interface
if test_loopback_bind("127.0.0.1", socket.AF_INET):
return "127.0.0.1"
elif test_loopback_bind("::1", socket.AF_INET6):
return "::1"
else:
raise RuntimeError(
"Neither 127.0.0.1 nor ::1 are bound to a local interface. "
"Set the VLLM_LOOPBACK_IP environment variable explicitly."
)
def is_valid_ipv6_address(address: str) -> bool:
try:
ipaddress.IPv6Address(address)
return True
except ValueError:
return False
def split_host_port(host_port: str) -> tuple[str, int]:
# ipv6
if host_port.startswith("["):
host, port = host_port.rsplit("]", 1)
host = host[1:]
port = port.split(":")[1]
return host, int(port)
else:
host, port = host_port.split(":")
return host, int(port)
def join_host_port(host: str, port: int) -> str:
if is_valid_ipv6_address(host):
return f"[{host}]:{port}"
else:
return f"{host}:{port}"
def get_distributed_init_method(ip: str, port: int) -> str:
return get_tcp_uri(ip, port)
def get_tcp_uri(ip: str, port: int) -> str:
if is_valid_ipv6_address(ip):
return f"tcp://[{ip}]:{port}"
else:
return f"tcp://{ip}:{port}"
def get_open_zmq_ipc_path() -> str:
base_rpc_path = envs.VLLM_RPC_BASE_PATH
return f"ipc://{base_rpc_path}/{uuid4()}"
def get_open_zmq_inproc_path() -> str:
return f"inproc://{uuid4()}"
def get_open_port() -> int:
"""
Get an open port for the vLLM process to listen on.
An edge case to handle, is when we run data parallel,
we need to avoid ports that are potentially used by
the data parallel master process.
Right now we reserve 10 ports for the data parallel master
process. Currently it uses 2 ports.
"""
if "VLLM_DP_MASTER_PORT" in os.environ:
dp_master_port = envs.VLLM_DP_MASTER_PORT
reserved_port_range = range(dp_master_port, dp_master_port + 10)
while True:
candidate_port = _get_open_port()
if candidate_port not in reserved_port_range:
return candidate_port
return _get_open_port()
def get_open_ports_list(count: int = 5) -> list[int]:
"""Get a list of open ports."""
ports = set[int]()
while len(ports) < count:
ports.add(get_open_port())
return list(ports)
def _get_open_port() -> int:
port = envs.VLLM_PORT
if port is not None:
while True:
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("", port))
return port
except OSError:
port += 1 # Increment port number if already in use
logger.info("Port %d is already in use, trying port %d", port - 1, port)
# try ipv4
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("", 0))
return s.getsockname()[1]
except OSError:
# try ipv6
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
s.bind(("", 0))
return s.getsockname()[1]
def find_process_using_port(port: int) -> psutil.Process | None:
# TODO: We can not check for running processes with network
# port on macOS. Therefore, we can not have a full graceful shutdown
# of vLLM. For now, let's not look for processes in this case.
# Ref: https://www.florianreinhard.de/accessdenied-in-psutil/
if sys.platform.startswith("darwin"):
return None
our_pid = os.getpid()
for conn in psutil.net_connections():
if conn.laddr.port == port and (conn.pid is not None and conn.pid != our_pid):
try:
return psutil.Process(conn.pid)
except psutil.NoSuchProcess:
return None
return None
def update_environment_variables(envs: dict[str, str]): def update_environment_variables(envs: dict[str, str]):
for k, v in envs.items(): for k, v in envs.items():
if k in os.environ and os.environ[k] != v: if k in os.environ and os.environ[k] != v:
...@@ -1119,122 +925,6 @@ def get_exception_traceback(): ...@@ -1119,122 +925,6 @@ def get_exception_traceback():
return err_str return err_str
def split_zmq_path(path: str) -> tuple[str, str, str]:
"""Split a zmq path into its parts."""
parsed = urlparse(path)
if not parsed.scheme:
raise ValueError(f"Invalid zmq path: {path}")
scheme = parsed.scheme
host = parsed.hostname or ""
port = str(parsed.port or "")
if scheme == "tcp" and not all((host, port)):
# The host and port fields are required for tcp
raise ValueError(f"Invalid zmq path: {path}")
if scheme != "tcp" and port:
# port only makes sense with tcp
raise ValueError(f"Invalid zmq path: {path}")
return scheme, host, port
def make_zmq_path(scheme: str, host: str, port: int | None = None) -> str:
"""Make a ZMQ path from its parts.
Args:
scheme: The ZMQ transport scheme (e.g. tcp, ipc, inproc).
host: The host - can be an IPv4 address, IPv6 address, or hostname.
port: Optional port number, only used for TCP sockets.
Returns:
A properly formatted ZMQ path string.
"""
if port is None:
return f"{scheme}://{host}"
if is_valid_ipv6_address(host):
return f"{scheme}://[{host}]:{port}"
return f"{scheme}://{host}:{port}"
# Adapted from: https://github.com/sgl-project/sglang/blob/v0.4.1/python/sglang/srt/utils.py#L783 # noqa: E501
def make_zmq_socket(
ctx: zmq.asyncio.Context | zmq.Context, # type: ignore[name-defined]
path: str,
socket_type: Any,
bind: bool | None = None,
identity: bytes | None = None,
linger: int | None = None,
) -> zmq.Socket | zmq.asyncio.Socket: # type: ignore[name-defined]
"""Make a ZMQ socket with the proper bind/connect semantics."""
mem = psutil.virtual_memory()
socket = ctx.socket(socket_type)
# Calculate buffer size based on system memory
total_mem = mem.total / 1024**3
available_mem = mem.available / 1024**3
# For systems with substantial memory (>32GB total, >16GB available):
# - Set a large 0.5GB buffer to improve throughput
# For systems with less memory:
# - Use system default (-1) to avoid excessive memory consumption
buf_size = int(0.5 * 1024**3) if total_mem > 32 and available_mem > 16 else -1
if bind is None:
bind = socket_type not in (zmq.PUSH, zmq.SUB, zmq.XSUB)
if socket_type in (zmq.PULL, zmq.DEALER, zmq.ROUTER):
socket.setsockopt(zmq.RCVHWM, 0)
socket.setsockopt(zmq.RCVBUF, buf_size)
if socket_type in (zmq.PUSH, zmq.DEALER, zmq.ROUTER):
socket.setsockopt(zmq.SNDHWM, 0)
socket.setsockopt(zmq.SNDBUF, buf_size)
if identity is not None:
socket.setsockopt(zmq.IDENTITY, identity)
if linger is not None:
socket.setsockopt(zmq.LINGER, linger)
if socket_type == zmq.XPUB:
socket.setsockopt(zmq.XPUB_VERBOSE, True)
# Determine if the path is a TCP socket with an IPv6 address.
# Enable IPv6 on the zmq socket if so.
scheme, host, _ = split_zmq_path(path)
if scheme == "tcp" and is_valid_ipv6_address(host):
socket.setsockopt(zmq.IPV6, 1)
if bind:
socket.bind(path)
else:
socket.connect(path)
return socket
@contextlib.contextmanager
def zmq_socket_ctx(
path: str,
socket_type: Any,
bind: bool | None = None,
linger: int = 0,
identity: bytes | None = None,
) -> Iterator[zmq.Socket]:
"""Context manager for a ZMQ socket"""
ctx = zmq.Context() # type: ignore[attr-defined]
try:
yield make_zmq_socket(ctx, path, socket_type, bind=bind, identity=identity)
except KeyboardInterrupt:
logger.debug("Got Keyboard Interrupt.")
finally:
ctx.destroy(linger=linger)
def _maybe_force_spawn(): def _maybe_force_spawn():
"""Check if we need to force the use of the `spawn` multiprocessing start """Check if we need to force the use of the `spawn` multiprocessing start
method. method.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import contextlib
import ipaddress
import os
import socket
import sys
import warnings
from collections.abc import (
Iterator,
Sequence,
)
from typing import Any
from urllib.parse import urlparse
from uuid import uuid4
import psutil
import zmq
import zmq.asyncio
import vllm.envs as envs
from vllm.logger import init_logger
logger = init_logger(__name__)
def close_sockets(sockets: Sequence[zmq.Socket | zmq.asyncio.Socket]):
for sock in sockets:
if sock is not None:
sock.close(linger=0)
def get_ip() -> str:
host_ip = envs.VLLM_HOST_IP
if "HOST_IP" in os.environ and "VLLM_HOST_IP" not in os.environ:
logger.warning(
"The environment variable HOST_IP is deprecated and ignored, as"
" it is often used by Docker and other software to"
" interact with the container's network stack. Please "
"use VLLM_HOST_IP instead to set the IP address for vLLM processes"
" to communicate with each other."
)
if host_ip:
return host_ip
# IP is not set, try to get it from the network interface
# try ipv4
try:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(("8.8.8.8", 80)) # Doesn't need to be reachable
return s.getsockname()[0]
except Exception:
pass
# try ipv6
try:
with socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) as s:
# Google's public DNS server, see
# https://developers.google.com/speed/public-dns/docs/using#addresses
s.connect(("2001:4860:4860::8888", 80)) # Doesn't need to be reachable
return s.getsockname()[0]
except Exception:
pass
warnings.warn(
"Failed to get the IP address, using 0.0.0.0 by default."
"The value can be set by the environment variable"
" VLLM_HOST_IP or HOST_IP.",
stacklevel=2,
)
return "0.0.0.0"
def test_loopback_bind(address, family):
try:
s = socket.socket(family, socket.SOCK_DGRAM)
s.bind((address, 0)) # Port 0 = auto assign
s.close()
return True
except OSError:
return False
def get_loopback_ip() -> str:
loopback_ip = envs.VLLM_LOOPBACK_IP
if loopback_ip:
return loopback_ip
# VLLM_LOOPBACK_IP is not set, try to get it based on network interface
if test_loopback_bind("127.0.0.1", socket.AF_INET):
return "127.0.0.1"
elif test_loopback_bind("::1", socket.AF_INET6):
return "::1"
else:
raise RuntimeError(
"Neither 127.0.0.1 nor ::1 are bound to a local interface. "
"Set the VLLM_LOOPBACK_IP environment variable explicitly."
)
def is_valid_ipv6_address(address: str) -> bool:
try:
ipaddress.IPv6Address(address)
return True
except ValueError:
return False
def split_host_port(host_port: str) -> tuple[str, int]:
# ipv6
if host_port.startswith("["):
host, port = host_port.rsplit("]", 1)
host = host[1:]
port = port.split(":")[1]
return host, int(port)
else:
host, port = host_port.split(":")
return host, int(port)
def join_host_port(host: str, port: int) -> str:
if is_valid_ipv6_address(host):
return f"[{host}]:{port}"
else:
return f"{host}:{port}"
def get_distributed_init_method(ip: str, port: int) -> str:
return get_tcp_uri(ip, port)
def get_tcp_uri(ip: str, port: int) -> str:
if is_valid_ipv6_address(ip):
return f"tcp://[{ip}]:{port}"
else:
return f"tcp://{ip}:{port}"
def get_open_zmq_ipc_path() -> str:
base_rpc_path = envs.VLLM_RPC_BASE_PATH
return f"ipc://{base_rpc_path}/{uuid4()}"
def get_open_zmq_inproc_path() -> str:
return f"inproc://{uuid4()}"
def get_open_port() -> int:
"""
Get an open port for the vLLM process to listen on.
An edge case to handle, is when we run data parallel,
we need to avoid ports that are potentially used by
the data parallel master process.
Right now we reserve 10 ports for the data parallel master
process. Currently it uses 2 ports.
"""
if "VLLM_DP_MASTER_PORT" in os.environ:
dp_master_port = envs.VLLM_DP_MASTER_PORT
reserved_port_range = range(dp_master_port, dp_master_port + 10)
while True:
candidate_port = _get_open_port()
if candidate_port not in reserved_port_range:
return candidate_port
return _get_open_port()
def get_open_ports_list(count: int = 5) -> list[int]:
"""Get a list of open ports."""
ports = set[int]()
while len(ports) < count:
ports.add(get_open_port())
return list(ports)
def _get_open_port() -> int:
port = envs.VLLM_PORT
if port is not None:
while True:
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("", port))
return port
except OSError:
port += 1 # Increment port number if already in use
logger.info("Port %d is already in use, trying port %d", port - 1, port)
# try ipv4
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("", 0))
return s.getsockname()[1]
except OSError:
# try ipv6
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
s.bind(("", 0))
return s.getsockname()[1]
def find_process_using_port(port: int) -> psutil.Process | None:
# TODO: We can not check for running processes with network
# port on macOS. Therefore, we can not have a full graceful shutdown
# of vLLM. For now, let's not look for processes in this case.
# Ref: https://www.florianreinhard.de/accessdenied-in-psutil/
if sys.platform.startswith("darwin"):
return None
our_pid = os.getpid()
for conn in psutil.net_connections():
if conn.laddr.port == port and (conn.pid is not None and conn.pid != our_pid):
try:
return psutil.Process(conn.pid)
except psutil.NoSuchProcess:
return None
return None
def split_zmq_path(path: str) -> tuple[str, str, str]:
"""Split a zmq path into its parts."""
parsed = urlparse(path)
if not parsed.scheme:
raise ValueError(f"Invalid zmq path: {path}")
scheme = parsed.scheme
host = parsed.hostname or ""
port = str(parsed.port or "")
if scheme == "tcp" and not all((host, port)):
# The host and port fields are required for tcp
raise ValueError(f"Invalid zmq path: {path}")
if scheme != "tcp" and port:
# port only makes sense with tcp
raise ValueError(f"Invalid zmq path: {path}")
return scheme, host, port
def make_zmq_path(scheme: str, host: str, port: int | None = None) -> str:
"""Make a ZMQ path from its parts.
Args:
scheme: The ZMQ transport scheme (e.g. tcp, ipc, inproc).
host: The host - can be an IPv4 address, IPv6 address, or hostname.
port: Optional port number, only used for TCP sockets.
Returns:
A properly formatted ZMQ path string.
"""
if port is None:
return f"{scheme}://{host}"
if is_valid_ipv6_address(host):
return f"{scheme}://[{host}]:{port}"
return f"{scheme}://{host}:{port}"
# Adapted from: https://github.com/sgl-project/sglang/blob/v0.4.1/python/sglang/srt/utils.py#L783 # noqa: E501
def make_zmq_socket(
ctx: zmq.asyncio.Context | zmq.Context, # type: ignore[name-defined]
path: str,
socket_type: Any,
bind: bool | None = None,
identity: bytes | None = None,
linger: int | None = None,
) -> zmq.Socket | zmq.asyncio.Socket: # type: ignore[name-defined]
"""Make a ZMQ socket with the proper bind/connect semantics."""
mem = psutil.virtual_memory()
socket = ctx.socket(socket_type)
# Calculate buffer size based on system memory
total_mem = mem.total / 1024**3
available_mem = mem.available / 1024**3
# For systems with substantial memory (>32GB total, >16GB available):
# - Set a large 0.5GB buffer to improve throughput
# For systems with less memory:
# - Use system default (-1) to avoid excessive memory consumption
buf_size = int(0.5 * 1024**3) if total_mem > 32 and available_mem > 16 else -1
if bind is None:
bind = socket_type not in (zmq.PUSH, zmq.SUB, zmq.XSUB)
if socket_type in (zmq.PULL, zmq.DEALER, zmq.ROUTER):
socket.setsockopt(zmq.RCVHWM, 0)
socket.setsockopt(zmq.RCVBUF, buf_size)
if socket_type in (zmq.PUSH, zmq.DEALER, zmq.ROUTER):
socket.setsockopt(zmq.SNDHWM, 0)
socket.setsockopt(zmq.SNDBUF, buf_size)
if identity is not None:
socket.setsockopt(zmq.IDENTITY, identity)
if linger is not None:
socket.setsockopt(zmq.LINGER, linger)
if socket_type == zmq.XPUB:
socket.setsockopt(zmq.XPUB_VERBOSE, True)
# Determine if the path is a TCP socket with an IPv6 address.
# Enable IPv6 on the zmq socket if so.
scheme, host, _ = split_zmq_path(path)
if scheme == "tcp" and is_valid_ipv6_address(host):
socket.setsockopt(zmq.IPV6, 1)
if bind:
socket.bind(path)
else:
socket.connect(path)
return socket
@contextlib.contextmanager
def zmq_socket_ctx(
path: str,
socket_type: Any,
bind: bool | None = None,
linger: int = 0,
identity: bytes | None = None,
) -> Iterator[zmq.Socket]:
"""Context manager for a ZMQ socket"""
ctx = zmq.Context() # type: ignore[attr-defined]
try:
yield make_zmq_socket(ctx, path, socket_type, bind=bind, identity=identity)
except KeyboardInterrupt:
logger.debug("Got Keyboard Interrupt.")
finally:
ctx.destroy(linger=linger)
...@@ -10,7 +10,8 @@ import zmq ...@@ -10,7 +10,8 @@ import zmq
from vllm.config import ParallelConfig from vllm.config import ParallelConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import get_mp_context, make_zmq_socket, set_process_title from vllm.utils import get_mp_context, set_process_title
from vllm.utils.network_utils import make_zmq_socket
from vllm.v1.engine import EngineCoreOutputs, EngineCoreRequestType from vllm.v1.engine import EngineCoreOutputs, EngineCoreRequestType
from vllm.v1.serial_utils import MsgpackDecoder from vllm.v1.serial_utils import MsgpackDecoder
from vllm.v1.utils import get_engine_client_zmq_addr, shutdown from vllm.v1.utils import get_engine_client_zmq_addr, shutdown
......
...@@ -30,12 +30,12 @@ from vllm.tasks import POOLING_TASKS, SupportedTask ...@@ -30,12 +30,12 @@ from vllm.tasks import POOLING_TASKS, SupportedTask
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
from vllm.utils import ( from vllm.utils import (
decorate_logs, decorate_logs,
make_zmq_socket,
set_process_title, set_process_title,
) )
from vllm.utils.gc_utils import maybe_attach_gc_debug_callback from vllm.utils.gc_utils import maybe_attach_gc_debug_callback
from vllm.utils.hashing import get_hash_fn_by_name from vllm.utils.hashing import get_hash_fn_by_name
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.network_utils import make_zmq_socket
from vllm.v1.core.kv_cache_utils import ( from vllm.v1.core.kv_cache_utils import (
BlockHash, BlockHash,
generate_scheduler_kv_cache_config, generate_scheduler_kv_cache_config,
......
...@@ -23,13 +23,13 @@ from vllm.config import VllmConfig ...@@ -23,13 +23,13 @@ from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.utils import ( from vllm.utils.asyncio import in_loop
from vllm.utils.network_utils import (
close_sockets, close_sockets,
get_open_port, get_open_port,
get_open_zmq_inproc_path, get_open_zmq_inproc_path,
make_zmq_socket, make_zmq_socket,
) )
from vllm.utils.asyncio import in_loop
from vllm.v1.engine import ( from vllm.v1.engine import (
EngineCoreOutputs, EngineCoreOutputs,
EngineCoreRequest, EngineCoreRequest,
......
...@@ -20,7 +20,8 @@ from vllm.config import CacheConfig, ParallelConfig, VllmConfig ...@@ -20,7 +20,8 @@ from vllm.config import CacheConfig, ParallelConfig, VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.ray.ray_env import get_env_vars_to_copy from vllm.ray.ray_env import get_env_vars_to_copy
from vllm.utils import get_mp_context, get_open_zmq_ipc_path, zmq_socket_ctx from vllm.utils import get_mp_context
from vllm.utils.network_utils import get_open_zmq_ipc_path, zmq_socket_ctx
from vllm.v1.engine.coordinator import DPCoordinator from vllm.v1.engine.coordinator import DPCoordinator
from vllm.v1.executor.abstract import Executor from vllm.v1.executor.abstract import Executor
from vllm.v1.utils import get_engine_client_zmq_addr, shutdown from vllm.v1.utils import get_engine_client_zmq_addr, shutdown
......
...@@ -38,11 +38,13 @@ from vllm.logger import init_logger ...@@ -38,11 +38,13 @@ from vllm.logger import init_logger
from vllm.utils import ( from vllm.utils import (
_maybe_force_spawn, _maybe_force_spawn,
decorate_logs, decorate_logs,
get_mp_context,
set_process_title,
)
from vllm.utils.network_utils import (
get_distributed_init_method, get_distributed_init_method,
get_loopback_ip, get_loopback_ip,
get_mp_context,
get_open_port, get_open_port,
set_process_title,
) )
from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.executor.abstract import Executor, FailureCallback from vllm.v1.executor.abstract import Executor, FailureCallback
......
...@@ -25,12 +25,8 @@ from torch.autograd.profiler import record_function ...@@ -25,12 +25,8 @@ from torch.autograd.profiler import record_function
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext, is_usage_stats_enabled, usage_message from vllm.usage.usage_lib import UsageContext, is_usage_stats_enabled, usage_message
from vllm.utils import ( from vllm.utils import kill_process_tree
get_open_port, from vllm.utils.network_utils import get_open_port, get_open_zmq_ipc_path, get_tcp_uri
get_open_zmq_ipc_path,
get_tcp_uri,
kill_process_tree,
)
if TYPE_CHECKING: if TYPE_CHECKING:
import numpy as np import numpy as np
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment