util.py 2.77 KB
Newer Older
aiss's avatar
aiss committed
1
2
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
aiss's avatar
aiss committed
3

aiss's avatar
aiss committed
4
5
6
# DeepSpeed Team

import pytest
aiss's avatar
aiss committed
7
import torch
aiss's avatar
aiss committed
8
import deepspeed
aiss's avatar
aiss committed
9
10
11
from deepspeed.git_version_info import torch_info


aiss's avatar
aiss committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def skip_on_arch(min_arch=7):
    if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
        if torch.cuda.get_device_capability()[0] < min_arch:  #ignore-cuda
            pytest.skip(f"needs higher compute capability than {min_arch}")
    else:
        assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
        return


def skip_on_cuda(valid_cuda):
    split_version = lambda x: map(int, x.split('.')[:2])
    if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
        CUDA_MAJOR, CUDA_MINOR = split_version(torch_info['cuda_version'])
        CUDA_VERSION = (CUDA_MAJOR * 10) + CUDA_MINOR
        if valid_cuda.count(CUDA_VERSION) == 0:
            pytest.skip(f"requires cuda versions {valid_cuda}")
    else:
        assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
        return


aiss's avatar
aiss committed
33
34
35
36
37
38
39
40
41
42
def required_torch_version():
    TORCH_MAJOR = int(torch.__version__.split('.')[0])
    TORCH_MINOR = int(torch.__version__.split('.')[1])

    if TORCH_MAJOR >= 1 and TORCH_MINOR >= 8:
        return True
    else:
        return False


aiss's avatar
aiss committed
43
44
45
46
47
def bf16_required_version_check(accelerator_check=True):
    split_version = lambda x: map(int, x.split('.')[:2])
    TORCH_MAJOR, TORCH_MINOR = split_version(torch_info['version'])
    NCCL_MAJOR, NCCL_MINOR = split_version(torch_info['nccl_version'])
    CUDA_MAJOR, CUDA_MINOR = split_version(torch_info['cuda_version'])
aiss's avatar
aiss committed
48

aiss's avatar
aiss committed
49
50
51
    # Sometimes bf16 tests are runnable even if not natively supported by accelerator
    if accelerator_check:
        accelerator_pass = torch_info['bf16_support']
aiss's avatar
aiss committed
52
    else:
aiss's avatar
aiss committed
53
        accelerator_pass = True
aiss's avatar
aiss committed
54

aiss's avatar
aiss committed
55
56
    if (TORCH_MAJOR > 1 or (TORCH_MAJOR == 1 and TORCH_MINOR >= 10)) and (CUDA_MAJOR >= 11) and (
            NCCL_MAJOR > 2 or (NCCL_MAJOR == 2 and NCCL_MINOR >= 10)) and accelerator_pass:
aiss's avatar
aiss committed
57
58
59
        return True
    else:
        return False
aiss's avatar
aiss committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87


def required_minimum_torch_version(major_version, minor_version):
    TORCH_MAJOR = int(torch.__version__.split('.')[0])
    TORCH_MINOR = int(torch.__version__.split('.')[1])

    if TORCH_MAJOR < major_version:
        return False

    return TORCH_MAJOR > major_version or TORCH_MINOR >= minor_version


def required_maximum_torch_version(major_version, minor_version):
    TORCH_MAJOR = int(torch.__version__.split('.')[0])
    TORCH_MINOR = int(torch.__version__.split('.')[1])

    if TORCH_MAJOR > major_version:
        return False

    return TORCH_MAJOR < major_version or TORCH_MINOR <= minor_version


def required_amp_check():
    from importlib.util import find_spec
    if find_spec('apex') is None:
        return False
    else:
        return True