utils.py 1.22 KB
Newer Older
Woosuk Kwon's avatar
Woosuk Kwon committed
1
import enum
Zhuohan Li's avatar
Zhuohan Li committed
2
import uuid
3
from platform import uname
Zhuohan Li's avatar
Zhuohan Li committed
4

5
import psutil
Zhuohan Li's avatar
Zhuohan Li committed
6
7
import torch

8
from vllm._C import cuda_utils
9

Woosuk Kwon's avatar
Woosuk Kwon committed
10
11
12
13
14
15
16
17
18
19
20

class Device(enum.Enum):
    GPU = enum.auto()
    CPU = enum.auto()


class Counter:

    def __init__(self, start: int = 0) -> None:
        self.counter = start

Woosuk Kwon's avatar
Woosuk Kwon committed
21
    def __next__(self) -> int:
22
        i = self.counter
Woosuk Kwon's avatar
Woosuk Kwon committed
23
        self.counter += 1
24
        return i
Woosuk Kwon's avatar
Woosuk Kwon committed
25
26
27

    def reset(self) -> None:
        self.counter = 0
Zhuohan Li's avatar
Zhuohan Li committed
28

29

30
31
32
33
def is_hip() -> bool:
    return torch.version.hip is not None


34
35
36
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
    """Returns the maximum shared memory per thread block in bytes."""
    # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
37
    cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
38
39
40
41
42
    max_shared_mem = cuda_utils.get_device_attribute(
        cudaDevAttrMaxSharedMemoryPerBlockOptin, gpu)
    return int(max_shared_mem)


43
def get_cpu_memory() -> int:
44
    """Returns the total CPU memory of the node in bytes."""
45
    return psutil.virtual_memory().total
Zhuohan Li's avatar
Zhuohan Li committed
46
47
48
49


def random_uuid() -> str:
    return str(uuid.uuid4().hex)
50

51

52
53
54
def in_wsl() -> bool:
    # Reference: https://github.com/microsoft/WSL/issues/4071
    return "microsoft" in " ".join(uname()).lower()