"src/vscode:/vscode.git/clone" did not exist on "bae04ea9d891e22de9c66247d2429afebfa45af1"
setup.py 13.5 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
"""
Copyright 2020 The Microsoft DeepSpeed Team

DeepSpeed library

Create a new wheel via the following command: python setup.py bdist_wheel

The wheel will be located at: dist/*.whl
"""

11
import os
Jeff Rasley's avatar
Jeff Rasley committed
12
import torch
13
import shutil
14
15
import subprocess
import warnings
Jeff Rasley's avatar
Jeff Rasley committed
16
from setuptools import setup, find_packages
17
18
19
20
21
22
23
24
25
26
from torch.utils.cpp_extension import CUDAExtension, BuildExtension, CppExtension

VERSION = "0.3.0"


def fetch_requirements(path):
    with open(path, 'r') as fd:
        return [r.strip() for r in fd.readlines()]


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def available_vector_instructions():
    try:
        import cpufeature
    except ImportError:
        warnings.warn(
            f'import cpufeature failed - CPU vector optimizations are not available for CPUAdam'
        )
        return {}

    cpu_vector_instructions = {}
    try:
        cpu_vector_instructions = cpufeature.CPUFeature
    except _:
        warnings.warn(
            f'cpufeature.CPUFeature failed - CPU vector optimizations are not available for CPUAdam'
        )
        return {}

    return cpu_vector_instructions


48
49
50
51
install_requires = fetch_requirements('requirements/requirements.txt')
dev_requires = fetch_requirements('requirements/requirements-dev.txt')
sparse_attn_requires = fetch_requirements('requirements/requirements-sparse-attn.txt')

52
# If MPI is available add 1bit-adam requirements
53
if torch.cuda.is_available():
54
55
56
57
58
    if shutil.which('ompi_info') or shutil.which('mpiname'):
        onebit_adam_requires = fetch_requirements(
            'requirements/requirements-1bit-adam.txt')
        onebit_adam_requires.append(f"cupy-cuda{torch.version.cuda.replace('.','')[:3]}")
        install_requires += onebit_adam_requires
59

Jeff Rasley's avatar
Jeff Rasley committed
60
61
62
63
# Constants for each op
LAMB = "lamb"
TRANSFORMER = "transformer"
SPARSE_ATTN = "sparse-attn"
64
CPU_ADAM = "cpu-adam"
Jeff Rasley's avatar
Jeff Rasley committed
65

66
67
cpu_vector_instructions = available_vector_instructions()

68
69
70
71
# Build environment variables for custom builds
DS_BUILD_LAMB_MASK = 1
DS_BUILD_TRANSFORMER_MASK = 10
DS_BUILD_SPARSE_ATTN_MASK = 100
72
DS_BUILD_CPU_ADAM_MASK = 1000
73
74

# Allow for build_cuda to turn on or off all ops
75
DS_BUILD_ALL_OPS = DS_BUILD_LAMB_MASK | DS_BUILD_TRANSFORMER_MASK | DS_BUILD_SPARSE_ATTN_MASK | DS_BUILD_CPU_ADAM_MASK
76
77
78
79
DS_BUILD_CUDA = int(os.environ.get('DS_BUILD_CUDA', 1)) * DS_BUILD_ALL_OPS

# Set default of each op based on if build_cuda is set
OP_DEFAULT = DS_BUILD_CUDA == DS_BUILD_ALL_OPS
80
DS_BUILD_CPU_ADAM = int(os.environ.get('DS_BUILD_CPU_ADAM', 0)) * DS_BUILD_CPU_ADAM_MASK
81
82
83
84
DS_BUILD_LAMB = int(os.environ.get('DS_BUILD_LAMB', OP_DEFAULT)) * DS_BUILD_LAMB_MASK
DS_BUILD_TRANSFORMER = int(os.environ.get('DS_BUILD_TRANSFORMER',
                                          OP_DEFAULT)) * DS_BUILD_TRANSFORMER_MASK
DS_BUILD_SPARSE_ATTN = int(os.environ.get('DS_BUILD_SPARSE_ATTN',
85
                                          OP_DEFAULT)) * DS_BUILD_SPARSE_ATTN_MASK
86
87

# Final effective mask is the bitwise OR of each op
Jeff Rasley's avatar
Jeff Rasley committed
88
BUILD_MASK = (DS_BUILD_LAMB | DS_BUILD_TRANSFORMER | DS_BUILD_SPARSE_ATTN
89
              | DS_BUILD_CPU_ADAM)
90

91
install_ops = dict.fromkeys([LAMB, TRANSFORMER, SPARSE_ATTN, CPU_ADAM], False)
92
if BUILD_MASK & DS_BUILD_LAMB:
Jeff Rasley's avatar
Jeff Rasley committed
93
    install_ops[LAMB] = True
94
95
if BUILD_MASK & DS_BUILD_CPU_ADAM:
    install_ops[CPU_ADAM] = True
96
if BUILD_MASK & DS_BUILD_TRANSFORMER:
Jeff Rasley's avatar
Jeff Rasley committed
97
    install_ops[TRANSFORMER] = True
98
if BUILD_MASK & DS_BUILD_SPARSE_ATTN:
Jeff Rasley's avatar
Jeff Rasley committed
99
    install_ops[SPARSE_ATTN] = True
100
101
102
if len(install_ops) == 0:
    print("Building without any cuda/cpp extensions")
print(f'BUILD_MASK={BUILD_MASK}, install_ops={install_ops}')
Jeff Rasley's avatar
Jeff Rasley committed
103
104

cmdclass = {}
Jeff Rasley's avatar
Jeff Rasley committed
105
cmdclass['build_ext'] = BuildExtension.with_options(use_ninja=False)
Jeff Rasley's avatar
Jeff Rasley committed
106

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])

if not torch.cuda.is_available():
    # Fix to allow docker buils, similar to https://github.com/NVIDIA/apex/issues/486
    print(
        "[WARNING] Torch did not find cuda available, if cross-compling or running with cpu only "
        "you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
        "(compute capabilities 6.0, 6.1, 6.2)")
    if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
        os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"

# Fix from apex that might be relevant for us as well, related to https://github.com/NVIDIA/apex/issues/456
version_ge_1_1 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
    version_ge_1_1 = ['-DVERSION_GE_1_1']
version_ge_1_3 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
    version_ge_1_3 = ['-DVERSION_GE_1_3']
version_ge_1_5 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
    version_ge_1_5 = ['-DVERSION_GE_1_5']
version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5

Jeff Rasley's avatar
Jeff Rasley committed
131
SIMD_WIDTH = ''
132
if cpu_vector_instructions.get('AVX512f', False):
Jeff Rasley's avatar
Jeff Rasley committed
133
    SIMD_WIDTH = '-D__AVX512__'
134
elif cpu_vector_instructions.get('AVX2', False):
Jeff Rasley's avatar
Jeff Rasley committed
135
136
137
    SIMD_WIDTH = '-D__AVX256__'
print("SIMD_WIDTH = ", SIMD_WIDTH)

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
ext_modules = []

## Lamb ##
if BUILD_MASK & DS_BUILD_LAMB:
    ext_modules.append(
        CUDAExtension(name='deepspeed.ops.lamb.fused_lamb_cuda',
                      sources=[
                          'csrc/lamb/fused_lamb_cuda.cpp',
                          'csrc/lamb/fused_lamb_cuda_kernel.cu'
                      ],
                      include_dirs=['csrc/includes'],
                      extra_compile_args={
                          'cxx': [
                              '-O3',
                          ] + version_dependent_macros,
                          'nvcc': ['-O3',
                                   '--use_fast_math'] + version_dependent_macros
                      }))

Jeff Rasley's avatar
Jeff Rasley committed
157
## Adam ##
158
if BUILD_MASK & DS_BUILD_CPU_ADAM:
Jeff Rasley's avatar
Jeff Rasley committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    ext_modules.append(
        CUDAExtension(name='deepspeed.ops.adam.cpu_adam_op',
                      sources=[
                          'csrc/adam/cpu_adam.cpp',
                          'csrc/adam/custom_cuda_kernel.cu',
                      ],
                      include_dirs=['csrc/includes',
                                    '/usr/local/cuda/include'],
                      extra_compile_args={
                          'cxx': [
                              '-O3',
                              '-std=c++14',
                              '-L/usr/local/cuda/lib64',
                              '-lcudart',
                              '-lcublas',
                              '-g',
                              '-Wno-reorder',
                              '-march=native',
                              '-fopenmp',
                              SIMD_WIDTH
                          ],
                          'nvcc': [
                              '-O3',
                              '--use_fast_math',
                              '-gencode',
184
                              'arch=compute_61,code=compute_61',
Jeff Rasley's avatar
Jeff Rasley committed
185
186
187
188
189
190
191
192
193
                              '-gencode',
                              'arch=compute_70,code=compute_70',
                              '-std=c++14',
                              '-U__CUDA_NO_HALF_OPERATORS__',
                              '-U__CUDA_NO_HALF_CONVERSIONS__',
                              '-U__CUDA_NO_HALF2_OPERATORS__'
                          ]
                      }))

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
## Transformer ##
if BUILD_MASK & DS_BUILD_TRANSFORMER:
    ext_modules.append(
        CUDAExtension(name='deepspeed.ops.transformer.transformer_cuda',
                      sources=[
                          'csrc/transformer/ds_transformer_cuda.cpp',
                          'csrc/transformer/cublas_wrappers.cu',
                          'csrc/transformer/transform_kernels.cu',
                          'csrc/transformer/gelu_kernels.cu',
                          'csrc/transformer/dropout_kernels.cu',
                          'csrc/transformer/normalize_kernels.cu',
                          'csrc/transformer/softmax_kernels.cu',
                          'csrc/transformer/general_kernels.cu'
                      ],
                      include_dirs=['csrc/includes'],
                      extra_compile_args={
                          'cxx': ['-O3',
                                  '-std=c++14',
                                  '-g',
                                  '-Wno-reorder'],
                          'nvcc': [
                              '-O3',
                              '--use_fast_math',
                              '-gencode',
                              'arch=compute_61,code=compute_61',
                              '-gencode',
                              'arch=compute_70,code=compute_70',
221
                              '-std=c++14',
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
                              '-U__CUDA_NO_HALF_OPERATORS__',
                              '-U__CUDA_NO_HALF_CONVERSIONS__',
                              '-U__CUDA_NO_HALF2_OPERATORS__'
                          ]
                      }))
    ext_modules.append(
        CUDAExtension(name='deepspeed.ops.transformer.stochastic_transformer_cuda',
                      sources=[
                          'csrc/transformer/ds_transformer_cuda.cpp',
                          'csrc/transformer/cublas_wrappers.cu',
                          'csrc/transformer/transform_kernels.cu',
                          'csrc/transformer/gelu_kernels.cu',
                          'csrc/transformer/dropout_kernels.cu',
                          'csrc/transformer/normalize_kernels.cu',
                          'csrc/transformer/softmax_kernels.cu',
                          'csrc/transformer/general_kernels.cu'
                      ],
                      include_dirs=['csrc/includes'],
                      extra_compile_args={
                          'cxx': ['-O3',
                                  '-std=c++14',
                                  '-g',
                                  '-Wno-reorder'],
                          'nvcc': [
                              '-O3',
                              '--use_fast_math',
                              '-gencode',
                              'arch=compute_61,code=compute_61',
                              '-gencode',
                              'arch=compute_70,code=compute_70',
252
                              '-std=c++14',
253
254
255
256
257
258
259
260
261
                              '-U__CUDA_NO_HALF_OPERATORS__',
                              '-U__CUDA_NO_HALF_CONVERSIONS__',
                              '-U__CUDA_NO_HALF2_OPERATORS__',
                              '-D__STOCHASTIC_MODE__'
                          ]
                      }))


def command_exists(cmd):
Jeff Rasley's avatar
Jeff Rasley committed
262
263
264
265
266
267
268
269
270
    if '|' in cmd:
        cmds = cmd.split("|")
    else:
        cmds = [cmd]
    valid = False
    for cmd in cmds:
        result = subprocess.Popen(f'type {cmd}', stdout=subprocess.PIPE, shell=True)
        valid = valid or result.wait() == 0
    return valid
271
272
273
274
275


## Sparse transformer ##
if BUILD_MASK & DS_BUILD_SPARSE_ATTN:
    # Check to see if llvm and cmake are installed since they are dependencies
Jeff Rasley's avatar
Jeff Rasley committed
276
    required_commands = ['llvm-config|llvm-config-9', 'cmake']
277
278
279
280
281
282
283
284
285

    command_status = list(map(command_exists, required_commands))
    if not all(command_status):
        zipped_status = list(zip(required_commands, command_status))
        warnings.warn(
            f'Missing non-python requirements, please install the missing packages: {zipped_status}'
        )
        warnings.warn(
            'Skipping sparse attention installation due to missing required packages')
Jeff Rasley's avatar
Jeff Rasley committed
286
287
        # remove from installed ops list
        install_ops[SPARSE_ATTN] = False
288
289
290
291
292
293
294
295
296
297
    elif TORCH_MAJOR == 1 and TORCH_MINOR >= 5:
        ext_modules.append(
            CppExtension(name='deepspeed.ops.sparse_attention.cpp_utils',
                         sources=['csrc/sparse_attention/utils.cpp'],
                         extra_compile_args={'cxx': ['-O2',
                                                     '-fopenmp']}))
        # Add sparse attention requirements
        install_requires += sparse_attn_requires
    else:
        warnings.warn('Unable to meet requirements to install sparse attention')
Jeff Rasley's avatar
Jeff Rasley committed
298
299
        # remove from installed ops list
        install_ops[SPARSE_ATTN] = False
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315

# Add development requirements
install_requires += dev_requires

# Write out version/git info
git_hash_cmd = "git rev-parse --short HEAD"
git_branch_cmd = "git rev-parse --abbrev-ref HEAD"
if command_exists('git'):
    result = subprocess.check_output(git_hash_cmd, shell=True)
    git_hash = result.decode('utf-8').strip()
    result = subprocess.check_output(git_branch_cmd, shell=True)
    git_branch = result.decode('utf-8').strip()
else:
    git_hash = "unknown"
    git_branch = "unknown"
print(f"version={VERSION}+{git_hash}, git_hash={git_hash}, git_branch={git_branch}")
316
with open('deepspeed/git_version_info_installed.py', 'w') as fd:
317
318
319
    fd.write(f"version='{VERSION}+{git_hash}'\n")
    fd.write(f"git_hash='{git_hash}'\n")
    fd.write(f"git_branch='{git_branch}'\n")
Jeff Rasley's avatar
Jeff Rasley committed
320
    fd.write(f"installed_ops={install_ops}\n")
321
322

print(f'install_requires={install_requires}')
Jeff Rasley's avatar
Jeff Rasley committed
323
324

setup(name='deepspeed',
325
      version=f"{VERSION}+{git_hash}",
Jeff Rasley's avatar
Jeff Rasley committed
326
327
328
      description='DeepSpeed library',
      author='DeepSpeed Team',
      author_email='deepspeed@microsoft.com',
329
      url='http://deepspeed.ai',
330
      install_requires=install_requires,
Jeff Rasley's avatar
Jeff Rasley committed
331
332
333
      packages=find_packages(exclude=["docker",
                                      "third_party",
                                      "csrc"]),
334
      package_data={'deepspeed.ops.sparse_attention.trsrc': ['*.tr']},
Jeff Rasley's avatar
Jeff Rasley committed
335
      scripts=['bin/deepspeed',
336
337
338
               'bin/deepspeed.pt',
               'bin/ds',
               'bin/ds_ssh'],
339
340
341
342
343
344
      classifiers=[
          'Programming Language :: Python :: 3.6',
          'Programming Language :: Python :: 3.7',
          'Programming Language :: Python :: 3.8'
      ],
      license='MIT',
Jeff Rasley's avatar
Jeff Rasley committed
345
346
      ext_modules=ext_modules,
      cmdclass=cmdclass)