cpu_adam.py 2.65 KB
Newer Older
Samyam Rajbhandari's avatar
Samyam Rajbhandari committed
1
2
3
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
4
5
import os
import torch
Jeff Rasley's avatar
Jeff Rasley committed
6
import subprocess
7
8
9
10
11
12
13
14
15
16
17
18
19
from .builder import CUDAOpBuilder


class CPUAdamBuilder(CUDAOpBuilder):
    BUILD_VAR = "DS_BUILD_CPU_ADAM"
    NAME = "cpu_adam"

    def __init__(self):
        super().__init__(name=self.NAME)

    def absolute_name(self):
        return f'deepspeed.ops.adam.{self.NAME}_op'

401qingkong's avatar
401qingkong committed
20
21
22
23
24
    def sources(self, is_rocm_pytorch):
        if is_rocm_pytorch: 
            return ['csrc/adam/hip/cpu_adam.cpp', 'csrc/adam/hip/custom_hip_kernel.hip']
        else:
            return ['csrc/adam/cpu_adam.cpp', 'csrc/adam/custom_cuda_kernel.cu']
25
26

    def include_paths(self):
401qingkong's avatar
401qingkong committed
27
28
29
        #CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include")
        #return ['csrc/includes', CUDA_INCLUDE]
        return ['csrc/includes/', '/opt/rocm-3.9.1/include/']
30

Jeff Rasley's avatar
Jeff Rasley committed
31
32
33
34
35
36
37
    def simd_width(self):
        if not self.command_exists('lscpu'):
            self.warning(
                "CPUAdam attempted to query 'lscpu' to detect the existence "
                "of AVX instructions. However, 'lscpu' does not appear to exist on "
                "your system, will fall back to non-vectorized execution.")
            return ''
38

Jeff Rasley's avatar
Jeff Rasley committed
39
40
41
42
43
44
45
        result = subprocess.check_output('lscpu', shell=True)
        result = result.decode('utf-8').strip().lower()
        if 'genuineintel' in result:
            if 'avx512' in result:
                return '-D__AVX512__'
            elif 'avx2' in result:
                return '-D__AVX256__'
46
        return '-D__SCALAR__'
47
48

    def cxx_args(self):
401qingkong's avatar
401qingkong committed
49
        #CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64")
Jeff Rasley's avatar
Jeff Rasley committed
50
        SIMD_WIDTH = self.simd_width()
51

401qingkong's avatar
401qingkong committed
52
53
54
55
56
57
58
59
60
61
62
63
64
        #return [
        #    '-O3',
        #    '-std=c++14',
        #    f'-L{CUDA_LIB64}',
        #    '-lcudart',
        #    '-lcublas',
        #    '-g',
        #    '-Wno-reorder',
        #    '-march=native',
        #    '-fopenmp',
        #    SIMD_WIDTH
        #]

65
66
67
        return [
            '-O3',
            '-std=c++14',
401qingkong's avatar
401qingkong committed
68
            '-lrocblas',
69
70
71
72
            '-g',
            '-Wno-reorder',
            '-march=native',
            '-fopenmp',
401qingkong's avatar
401qingkong committed
73
            '-lpthread',
74
75
76
            SIMD_WIDTH
        ]
    def nvcc_args(self):
401qingkong's avatar
401qingkong committed
77
78
79
80
81
82
83
84
        #args = [
        #    '-O3',
        #    '--use_fast_math',
        #    '-std=c++14',
        #    '-U__CUDA_NO_HALF_OPERATORS__',
        #    '-U__CUDA_NO_HALF_CONVERSIONS__',
        #    '-U__CUDA_NO_HALF2_OPERATORS__'
        #]
85
86
        args = [
            '-O3',
401qingkong's avatar
401qingkong committed
87
88
89
            #'--use_fast_math',
            '-fopenmp',
            '-lpthread',
90
91
            '-std=c++14',
        ]
401qingkong's avatar
401qingkong committed
92
        #args += self.compute_capability_args()
93
        return args