import_utils.py 2.07 KB
Newer Older
fxmarty's avatar
fxmarty committed
1
import torch
2
from loguru import logger
3
import subprocess
fxmarty's avatar
fxmarty committed
4

Nicolas Patry's avatar
Nicolas Patry committed
5

Wang, Yi's avatar
Wang, Yi committed
6
def is_ipex_available():
7
8
9
10
    try:
        import intel_extension_for_pytorch
    except ImportError:
        return False
Wang, Yi's avatar
Wang, Yi committed
11
    return True
12

Nicolas Patry's avatar
Nicolas Patry committed
13

Nicolas Patry's avatar
Nicolas Patry committed
14
15
16
17
18
19
20
def get_cuda_free_memory(device, memory_fraction):
    total_free_memory, _ = torch.cuda.mem_get_info(device)
    total_gpu_memory = torch.cuda.get_device_properties(device).total_memory
    free_memory = max(0, total_free_memory - (1 - memory_fraction) * total_gpu_memory)
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
21
def get_xpu_free_memory(device, memory_fraction):
22
23
24
25
26
27
    total_memory = torch.xpu.get_device_properties(device).total_memory
    device_id = device.index
    query = f"xpu-smi dump -d {device_id} -m 18 -n 1"
    output = subprocess.check_output(query.split()).decode("utf-8").split("\n")
    used_memory = float(output[1].split(",")[-1]) * 1024 * 1024
    free_memory = int(total_memory * 0.95 - used_memory)
Nicolas Patry's avatar
Nicolas Patry committed
28
29
30
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
31
32
33
34
35
36
37
38
39
def get_cpu_free_memory(device, memory_fraction):
    import psutil
    from text_generation_server.utils.dist import WORLD_SIZE

    mem = psutil.virtual_memory()
    free_memory = int(mem.available * 0.95 / WORLD_SIZE)
    return free_memory


Nicolas Patry's avatar
Nicolas Patry committed
40
41
42
43
def noop(*args, **kwargs):
    pass


Nicolas Patry's avatar
Nicolas Patry committed
44
45
46
47
48
49
50
51
52
53
54
SYSTEM = None
if torch.version.hip is not None:
    SYSTEM = "rocm"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif torch.version.cuda is not None and torch.cuda.is_available():
    SYSTEM = "cuda"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
Nicolas Patry's avatar
Nicolas Patry committed
55
56
57
58
59
60
61
62
63
64
elif is_ipex_available():
    SYSTEM = "ipex"
    if hasattr(torch, "xpu") and torch.xpu.is_available():
        empty_cache = torch.xpu.empty_cache
        synchronize = torch.xpu.synchronize
        get_free_memory = get_xpu_free_memory
    else:
        empty_cache = noop
        synchronize = noop
        get_free_memory = get_cpu_free_memory
Nicolas Patry's avatar
Nicolas Patry committed
65
66
67
68
69
else:
    SYSTEM = "cpu"

    empty_cache = noop
    synchronize = noop
Wang, Yi's avatar
Wang, Yi committed
70
    get_free_memory = get_cpu_free_memory
71
logger.info(f"Detected system {SYSTEM}")