import_utils.py 1.98 KB
Newer Older
fxmarty's avatar
fxmarty committed
1
import torch
2
from loguru import logger
3
import subprocess
fxmarty's avatar
fxmarty committed
4

Nicolas Patry's avatar
Nicolas Patry committed
5

Wang, Yi's avatar
Wang, Yi committed
6
def is_ipex_available():
7
8
9
10
    try:
        import intel_extension_for_pytorch
    except ImportError:
        return False
Wang, Yi's avatar
Wang, Yi committed
11
    return True
12

Nicolas Patry's avatar
Nicolas Patry committed
13

Nicolas Patry's avatar
Nicolas Patry committed
14
15
16
17
18
19
20
def get_cuda_free_memory(device, memory_fraction):
    total_free_memory, _ = torch.cuda.mem_get_info(device)
    total_gpu_memory = torch.cuda.get_device_properties(device).total_memory
    free_memory = max(0, total_free_memory - (1 - memory_fraction) * total_gpu_memory)
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
21
def get_xpu_free_memory(device, memory_fraction):
22
23
24
25
26
27
    total_memory = torch.xpu.get_device_properties(device).total_memory
    device_id = device.index
    query = f"xpu-smi dump -d {device_id} -m 18 -n 1"
    output = subprocess.check_output(query.split()).decode("utf-8").split("\n")
    used_memory = float(output[1].split(",")[-1]) * 1024 * 1024
    free_memory = int(total_memory * 0.95 - used_memory)
Nicolas Patry's avatar
Nicolas Patry committed
28
29
30
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
31
32
33
34
35
36
37
38
39
40
def get_cpu_free_memory(device, memory_fraction):
    import psutil
    from text_generation_server.utils.dist import WORLD_SIZE

    mem = psutil.virtual_memory()
    free_memory = int(mem.available * 0.95 / WORLD_SIZE)
    return free_memory


IPEX_AVAIL = is_ipex_available()
Nicolas Patry's avatar
Nicolas Patry committed
41
42
43
44
45
46
47
48
49
50
51
SYSTEM = None
if torch.version.hip is not None:
    SYSTEM = "rocm"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif torch.version.cuda is not None and torch.cuda.is_available():
    SYSTEM = "cuda"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
Wang, Yi's avatar
Wang, Yi committed
52
elif IPEX_AVAIL and hasattr(torch, "xpu") and torch.xpu.is_available():
Nicolas Patry's avatar
Nicolas Patry committed
53
54
55
56
57
58
59
60
61
62
63
64
    SYSTEM = "xpu"
    empty_cache = torch.xpu.empty_cache
    synchronize = torch.xpu.synchronize
    get_free_memory = get_xpu_free_memory
else:
    SYSTEM = "cpu"

    def noop(*args, **kwargs):
        pass

    empty_cache = noop
    synchronize = noop
Wang, Yi's avatar
Wang, Yi committed
65
    get_free_memory = get_cpu_free_memory
66
logger.info(f"Detected system {SYSTEM}")