transformer.py 1.97 KB
Newer Older
Samyam Rajbhandari's avatar
Samyam Rajbhandari committed
1
2
3
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch
from .builder import CUDAOpBuilder


class TransformerBuilder(CUDAOpBuilder):
    BUILD_VAR = "DS_BUILD_TRANSFORMER"
    NAME = "transformer"

    def __init__(self, name=None):
        name = self.NAME if name is None else name
        super().__init__(name=name)

    def absolute_name(self):
        return f'deepspeed.ops.transformer.{self.NAME}_op'

401qingkong's avatar
401qingkong committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    def sources(self, is_rocm_pytorch):
        if is_rocm_pytorch:
            return [
                'csrc/transformer/hip/ds_transformer_hip.cpp',
                'csrc/transformer/hip/cublas_wrappers.hip',
                'csrc/transformer/hip/transform_kernels.hip',
                'csrc/transformer/hip/gelu_kernels.hip',
                'csrc/transformer/hip/dropout_kernels.hip',
         ###don't support
                #'csrc/transformer/hip/normalize_kernels.hip',
                #'csrc/transformer/hip/softmax_kernels.hip',
                'csrc/transformer/hip/general_kernels.hip'
            ]
        else:
            return [
                'csrc/transformer/ds_transformer_cuda.cpp',
                'csrc/transformer/cublas_wrappers.cu',
                'csrc/transformer/transform_kernels.cu',
                'csrc/transformer/gelu_kernels.cu',
                'csrc/transformer/dropout_kernels.cu',
                'csrc/transformer/normalize_kernels.cu',
                'csrc/transformer/softmax_kernels.cu',
                'csrc/transformer/general_kernels.cu'
            ]
43
44
45
46
47
48
49

    def include_paths(self):
        return ['csrc/includes']

    def nvcc_args(self):
        args = [
            '-O3',
401qingkong's avatar
401qingkong committed
50
            #'--use_fast_math',
51
            '-std=c++14',
401qingkong's avatar
401qingkong committed
52
53
54
            #'-U__CUDA_NO_HALF_OPERATORS__',
            #'-U__CUDA_NO_HALF_CONVERSIONS__',
            #'-U__CUDA_NO_HALF2_OPERATORS__'
55
56
        ]

401qingkong's avatar
401qingkong committed
57
        return args #+ self.compute_capability_args()
58
59

    def cxx_args(self):
401qingkong's avatar
401qingkong committed
60
        return ['-O3', '-std=c++14', '-g', '-Wno-reorder', '-Wno-c++11-narrowing']