import_utils.py 1.69 KB
Newer Older
fxmarty's avatar
fxmarty committed
1
import torch
2
from loguru import logger
3
import subprocess
fxmarty's avatar
fxmarty committed
4

Nicolas Patry's avatar
Nicolas Patry committed
5

6
7
8
9
10
11
12
13
def is_xpu_available():
    try:
        import intel_extension_for_pytorch
    except ImportError:
        return False

    return hasattr(torch, "xpu") and torch.xpu.is_available()

Nicolas Patry's avatar
Nicolas Patry committed
14

Nicolas Patry's avatar
Nicolas Patry committed
15
16
17
18
19
20
21
def get_cuda_free_memory(device, memory_fraction):
    total_free_memory, _ = torch.cuda.mem_get_info(device)
    total_gpu_memory = torch.cuda.get_device_properties(device).total_memory
    free_memory = max(0, total_free_memory - (1 - memory_fraction) * total_gpu_memory)
    return free_memory


Wang, Yi's avatar
Wang, Yi committed
22
def get_xpu_free_memory(device, memory_fraction):
23
24
25
26
27
28
    total_memory = torch.xpu.get_device_properties(device).total_memory
    device_id = device.index
    query = f"xpu-smi dump -d {device_id} -m 18 -n 1"
    output = subprocess.check_output(query.split()).decode("utf-8").split("\n")
    used_memory = float(output[1].split(",")[-1]) * 1024 * 1024
    free_memory = int(total_memory * 0.95 - used_memory)
Nicolas Patry's avatar
Nicolas Patry committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
    return free_memory


SYSTEM = None
if torch.version.hip is not None:
    SYSTEM = "rocm"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif torch.version.cuda is not None and torch.cuda.is_available():
    SYSTEM = "cuda"
    empty_cache = torch.cuda.empty_cache
    synchronize = torch.cuda.synchronize
    get_free_memory = get_cuda_free_memory
elif is_xpu_available():
    SYSTEM = "xpu"
    empty_cache = torch.xpu.empty_cache
    synchronize = torch.xpu.synchronize
    get_free_memory = get_xpu_free_memory
else:
    SYSTEM = "cpu"

    def noop(*args, **kwargs):
        pass

    empty_cache = noop
    synchronize = noop
    get_free_memory = noop
57
logger.info(f"Detected system {SYSTEM}")