utils.py 1.29 KB
Newer Older
Woosuk Kwon's avatar
Woosuk Kwon committed
1
import enum
Zhuohan Li's avatar
Zhuohan Li committed
2
import uuid
3
from platform import uname
Zhuohan Li's avatar
Zhuohan Li committed
4

5
import psutil
Zhuohan Li's avatar
Zhuohan Li committed
6
7
import torch

8
from vllm._C import cuda_utils
9

Woosuk Kwon's avatar
Woosuk Kwon committed
10
11
12
13
14
15
16
17
18
19
20

class Device(enum.Enum):
    GPU = enum.auto()
    CPU = enum.auto()


class Counter:

    def __init__(self, start: int = 0) -> None:
        self.counter = start

Woosuk Kwon's avatar
Woosuk Kwon committed
21
    def __next__(self) -> int:
22
        i = self.counter
Woosuk Kwon's avatar
Woosuk Kwon committed
23
        self.counter += 1
24
        return i
Woosuk Kwon's avatar
Woosuk Kwon committed
25
26
27

    def reset(self) -> None:
        self.counter = 0
Zhuohan Li's avatar
Zhuohan Li committed
28

29

30
31
32
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
    """Returns the maximum shared memory per thread block in bytes."""
    # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
33
    cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
34
35
36
37
38
    max_shared_mem = cuda_utils.get_device_attribute(
        cudaDevAttrMaxSharedMemoryPerBlockOptin, gpu)
    return int(max_shared_mem)


39
def get_gpu_memory(gpu: int = 0) -> int:
40
    """Returns the total memory of the GPU in bytes."""
41
42
43
44
    return torch.cuda.get_device_properties(gpu).total_memory


def get_cpu_memory() -> int:
45
    """Returns the total CPU memory of the node in bytes."""
46
    return psutil.virtual_memory().total
Zhuohan Li's avatar
Zhuohan Li committed
47
48
49
50


def random_uuid() -> str:
    return str(uuid.uuid4().hex)
51

52

53
54
55
def in_wsl() -> bool:
    # Reference: https://github.com/microsoft/WSL/issues/4071
    return "microsoft" in " ".join(uname()).lower()