utils.py 1.94 KB
Newer Older
Woosuk Kwon's avatar
Woosuk Kwon committed
1
import enum
2
import os
3
import socket
Zhuohan Li's avatar
Zhuohan Li committed
4
import uuid
5
from platform import uname
6
from typing import List
Zhuohan Li's avatar
Zhuohan Li committed
7

8
import psutil
Zhuohan Li's avatar
Zhuohan Li committed
9
10
import torch

Woosuk Kwon's avatar
Woosuk Kwon committed
11
12
13
14
15
16
17
18
19
20
21

class Device(enum.Enum):
    GPU = enum.auto()
    CPU = enum.auto()


class Counter:

    def __init__(self, start: int = 0) -> None:
        self.counter = start

Woosuk Kwon's avatar
Woosuk Kwon committed
22
    def __next__(self) -> int:
23
        i = self.counter
Woosuk Kwon's avatar
Woosuk Kwon committed
24
        self.counter += 1
25
        return i
Woosuk Kwon's avatar
Woosuk Kwon committed
26
27
28

    def reset(self) -> None:
        self.counter = 0
Zhuohan Li's avatar
Zhuohan Li committed
29

30

31
32
33
34
def is_hip() -> bool:
    return torch.version.hip is not None


35
36
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
    """Returns the maximum shared memory per thread block in bytes."""
37
38
39
40
    # NOTE: This import statement should be executed lazily since
    # the Neuron-X backend does not have the `cuda_utils` module.
    from vllm._C import cuda_utils

41
    # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
42
    cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
43
44
45
46
47
    max_shared_mem = cuda_utils.get_device_attribute(
        cudaDevAttrMaxSharedMemoryPerBlockOptin, gpu)
    return int(max_shared_mem)


48
def get_cpu_memory() -> int:
49
    """Returns the total CPU memory of the node in bytes."""
50
    return psutil.virtual_memory().total
Zhuohan Li's avatar
Zhuohan Li committed
51
52
53
54


def random_uuid() -> str:
    return str(uuid.uuid4().hex)
55

56

57
58
59
def in_wsl() -> bool:
    # Reference: https://github.com/microsoft/WSL/issues/4071
    return "microsoft" in " ".join(uname()).lower()
60
61


62
def get_ip() -> str:
63
64
65
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    s.connect(("8.8.8.8", 80))  # Doesn't need to be reachable
    return s.getsockname()[0]
66
67


68
69
70
71
def get_distributed_init_method(ip: str, port: int) -> str:
    return f"tcp://{ip}:{port}"


72
def get_open_port() -> int:
73
74
75
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("", 0))
        return s.getsockname()[1]
76
77
78
79


def set_cuda_visible_devices(device_ids: List[int]) -> None:
    os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids))