"vscode:/vscode.git/clone" did not exist on "c12f4734807b57f3d5a27fac91296fe9efdced58"
import_utils.py 2.05 KB
Newer Older
fxmarty's avatar
fxmarty committed
1
import torch
2
from loguru import logger
3
import subprocess
4
import os
fxmarty's avatar
fxmarty committed
5

Nicolas Patry's avatar
Nicolas Patry committed
6

Wang, Yi's avatar
Wang, Yi committed
7
def is_ipex_available():
8
9
10
11
    try:
        import intel_extension_for_pytorch
    except ImportError:
        return False
Wang, Yi's avatar
Wang, Yi committed
12
    return True
13

Nicolas Patry's avatar
Nicolas Patry committed
14

Nicolas Patry's avatar
Nicolas Patry committed
15
16
17
18
19
20
21
def get_cuda_free_memory(device, memory_fraction):
    total_free_memory, _ = torch.cuda.mem_get_info(device)
    total_gpu_memory = torch.cuda.get_device_properties(device).total_memory
    free_memory = max(0, total_free_memory - (1 - memory_fraction) * total_gpu_memory)
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
22
def get_xpu_free_memory(device, memory_fraction):
23
24
    total_memory = torch.xpu.get_device_properties(device).total_memory
    device_id = device.index
25
26
27
28
29
30
31
    memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0"))
    free_memory = max(
        0,
        int(
            total_memory * 0.9 * memory_fraction - torch.xpu.memory_reserved(device_id)
        ),
    )
Nicolas Patry's avatar
Nicolas Patry committed
32
33
34
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
35
36
37
38
39
40
41
42
43
def get_cpu_free_memory(device, memory_fraction):
    import psutil
    from text_generation_server.utils.dist import WORLD_SIZE

    mem = psutil.virtual_memory()
    free_memory = int(mem.available * 0.95 / WORLD_SIZE)
    return free_memory


Nicolas Patry's avatar
Nicolas Patry committed
44
45
46
47
def noop(*args, **kwargs):
    pass


Nicolas Patry's avatar
Nicolas Patry committed
48
49
50
51
52
53
54
55
56
57
58
SYSTEM = None
if torch.version.hip is not None:
    SYSTEM = "rocm"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif torch.version.cuda is not None and torch.cuda.is_available():
    SYSTEM = "cuda"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
Nicolas Patry's avatar
Nicolas Patry committed
59
60
61
62
63
64
65
66
67
68
elif is_ipex_available():
    SYSTEM = "ipex"
    if hasattr(torch, "xpu") and torch.xpu.is_available():
        empty_cache = torch.xpu.empty_cache
        synchronize = torch.xpu.synchronize
        get_free_memory = get_xpu_free_memory
    else:
        empty_cache = noop
        synchronize = noop
        get_free_memory = get_cpu_free_memory
Nicolas Patry's avatar
Nicolas Patry committed
69
70
71
72
73
else:
    SYSTEM = "cpu"

    empty_cache = noop
    synchronize = noop
Wang, Yi's avatar
Wang, Yi committed
74
    get_free_memory = get_cpu_free_memory
75
logger.info(f"Detected system {SYSTEM}")