"src/vscode:/vscode.git/clone" did not exist on "7447f75b9f8badb073636ed163417b0947c59e9f"
setup.py 5.35 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
"""
Copyright 2020 The Microsoft DeepSpeed Team

DeepSpeed library

Create a new wheel via the following command: python setup.py bdist_wheel

The wheel will be located at: dist/*.whl
"""

11
import os
Jeff Rasley's avatar
Jeff Rasley committed
12
13
14
15
16
17
18
import torch
from setuptools import setup, find_packages
from torch.utils.cpp_extension import CUDAExtension, BuildExtension

cmdclass = {}
cmdclass['build_ext'] = BuildExtension

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])

if not torch.cuda.is_available():
    # Fix to allow docker buils, similar to https://github.com/NVIDIA/apex/issues/486
    print(
        "[WARNING] Torch did not find cuda available, if cross-compling or running with cpu only "
        "you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
        "(compute capabilities 6.0, 6.1, 6.2)")
    if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
        os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"

# Fix from apex that might be relevant for us as well, related to https://github.com/NVIDIA/apex/issues/456
version_ge_1_1 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
    version_ge_1_1 = ['-DVERSION_GE_1_1']
version_ge_1_3 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
    version_ge_1_3 = ['-DVERSION_GE_1_3']
version_ge_1_5 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
    version_ge_1_5 = ['-DVERSION_GE_1_5']
version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
ext_modules = [
    CUDAExtension(
        name='deepspeed_lamb_cuda',
        sources=['csrc/lamb/fused_lamb_cuda.cpp',
                 'csrc/lamb/fused_lamb_cuda_kernel.cu'],
        include_dirs=['csrc/includes'],
        extra_compile_args={
            'cxx': [
                '-O3',
            ] + version_dependent_macros,
            'nvcc': ['-O3',
                     '--use_fast_math'] + version_dependent_macros
        }),
    CUDAExtension(name='deepspeed_transformer_cuda',
                  sources=[
                      'csrc/transformer/ds_transformer_cuda.cpp',
                      'csrc/transformer/cublas_wrappers.cu',
                      'csrc/transformer/transform_kernels.cu',
                      'csrc/transformer/gelu_kernels.cu',
                      'csrc/transformer/dropout_kernels.cu',
                      'csrc/transformer/normalize_kernels.cu',
                      'csrc/transformer/softmax_kernels.cu',
                      'csrc/transformer/general_kernels.cu'
                  ],
                  include_dirs=['csrc/includes'],
Jeff Rasley's avatar
Jeff Rasley committed
68
                  extra_compile_args={
69
70
71
72
73
                      'cxx': ['-O3',
                              '-std=c++14',
                              '-g',
                              '-Wno-reorder'],
                      'nvcc': [
Jeff Rasley's avatar
Jeff Rasley committed
74
                          '-O3',
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
                          '--use_fast_math',
                          '-gencode',
                          'arch=compute_61,code=compute_61',
                          '-gencode',
                          'arch=compute_70,code=compute_70',
                          '-std=c++14',
                          '-U__CUDA_NO_HALF_OPERATORS__',
                          '-U__CUDA_NO_HALF_CONVERSIONS__',
                          '-U__CUDA_NO_HALF2_OPERATORS__'
                      ]
                  }),
    CUDAExtension(name='deepspeed_stochastic_transformer_cuda',
                  sources=[
                      'csrc/transformer/ds_transformer_cuda.cpp',
                      'csrc/transformer/cublas_wrappers.cu',
                      'csrc/transformer/transform_kernels.cu',
                      'csrc/transformer/gelu_kernels.cu',
                      'csrc/transformer/dropout_kernels.cu',
                      'csrc/transformer/normalize_kernels.cu',
                      'csrc/transformer/softmax_kernels.cu',
                      'csrc/transformer/general_kernels.cu'
                  ],
                  include_dirs=['csrc/includes'],
                  extra_compile_args={
                      'cxx': ['-O3',
                              '-std=c++14',
                              '-g',
                              '-Wno-reorder'],
                      'nvcc': [
                          '-O3',
                          '--use_fast_math',
                          '-gencode',
                          'arch=compute_61,code=compute_61',
                          '-gencode',
                          'arch=compute_70,code=compute_70',
                          '-std=c++14',
                          '-U__CUDA_NO_HALF_OPERATORS__',
                          '-U__CUDA_NO_HALF_CONVERSIONS__',
                          '-U__CUDA_NO_HALF2_OPERATORS__',
                          '-D__STOCHASTIC_MODE__'
                      ]
                  }),
]
Jeff Rasley's avatar
Jeff Rasley committed
118
119

setup(name='deepspeed',
120
      version='0.2.0',
Jeff Rasley's avatar
Jeff Rasley committed
121
122
123
124
      description='DeepSpeed library',
      author='DeepSpeed Team',
      author_email='deepspeed@microsoft.com',
      url='http://aka.ms/deepspeed',
Jeff Rasley's avatar
Jeff Rasley committed
125
126
127
128
      packages=find_packages(exclude=["docker",
                                      "third_party",
                                      "csrc"]),
      scripts=['bin/deepspeed',
129
130
131
               'bin/deepspeed.pt',
               'bin/ds',
               'bin/ds_ssh'],
Jeff Rasley's avatar
Jeff Rasley committed
132
133
134
      classifiers=['Programming Language :: Python :: 3.6'],
      ext_modules=ext_modules,
      cmdclass=cmdclass)