import_utils.py 2.05 KB
Newer Older
fxmarty's avatar
fxmarty committed
1
import torch
xuxzh1's avatar
last  
xuxzh1 committed
2
3
4
from loguru import logger
import subprocess
import os
fxmarty's avatar
fxmarty committed
5

Nicolas Patry's avatar
Nicolas Patry committed
6

xuxzh1's avatar
last  
xuxzh1 committed
7
def is_ipex_available():
8
9
10
11
    try:
        import intel_extension_for_pytorch
    except ImportError:
        return False
xuxzh1's avatar
last  
xuxzh1 committed
12
    return True
13
14


xuxzh1's avatar
last  
xuxzh1 committed
15
16
17
18
19
def get_cuda_free_memory(device, memory_fraction):
    total_free_memory, _ = torch.cuda.mem_get_info(device)
    total_gpu_memory = torch.cuda.get_device_properties(device).total_memory
    free_memory = max(0, total_free_memory - (1 - memory_fraction) * total_gpu_memory)
    return free_memory
Nicolas Patry's avatar
Nicolas Patry committed
20

xuxzh1's avatar
last  
xuxzh1 committed
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

def get_xpu_free_memory(device, memory_fraction):
    total_memory = torch.xpu.get_device_properties(device).total_memory
    device_id = device.index
    memory_fraction = float(os.getenv("XPU_MEMORY_FRACTION", "1.0"))
    free_memory = max(
        0,
        int(
            total_memory * 0.9 * memory_fraction - torch.xpu.memory_reserved(device_id)
        ),
    )
    return free_memory


def get_cpu_free_memory(device, memory_fraction):
    import psutil
    from text_generation_server.utils.dist import WORLD_SIZE

    mem = psutil.virtual_memory()
    free_memory = int(mem.available * 0.95 / WORLD_SIZE)
    return free_memory


def noop(*args, **kwargs):
    pass


SYSTEM = None
if torch.version.hip is not None:
    SYSTEM = "rocm"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif torch.version.cuda is not None and torch.cuda.is_available():
    SYSTEM = "cuda"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif is_ipex_available():
    SYSTEM = "ipex"
    if hasattr(torch, "xpu") and torch.xpu.is_available():
        empty_cache = torch.xpu.empty_cache
        synchronize = torch.xpu.synchronize
        get_free_memory = get_xpu_free_memory
    else:
        empty_cache = noop
        synchronize = noop
        get_free_memory = get_cpu_free_memory
else:
    SYSTEM = "cpu"

    empty_cache = noop
    synchronize = noop
    get_free_memory = get_cpu_free_memory
logger.info(f"Detected system {SYSTEM}")