""" Copyright 2020 The Microsoft DeepSpeed Team """ import torch from .builder import CUDAOpBuilder class TransformerBuilder(CUDAOpBuilder): BUILD_VAR = "DS_BUILD_TRANSFORMER" NAME = "transformer" def __init__(self, name=None): name = self.NAME if name is None else name super().__init__(name=name) def absolute_name(self): return f'deepspeed.ops.transformer.{self.NAME}_op' def sources(self, is_rocm_pytorch): if is_rocm_pytorch: return [ 'csrc/transformer/hip/ds_transformer_hip.cpp', 'csrc/transformer/hip/cublas_wrappers.hip', 'csrc/transformer/hip/transform_kernels.hip', 'csrc/transformer/hip/gelu_kernels.hip', 'csrc/transformer/hip/dropout_kernels.hip', ###don't support #'csrc/transformer/hip/normalize_kernels.hip', #'csrc/transformer/hip/softmax_kernels.hip', 'csrc/transformer/hip/general_kernels.hip' ] else: return [ 'csrc/transformer/ds_transformer_cuda.cpp', 'csrc/transformer/cublas_wrappers.cu', 'csrc/transformer/transform_kernels.cu', 'csrc/transformer/gelu_kernels.cu', 'csrc/transformer/dropout_kernels.cu', 'csrc/transformer/normalize_kernels.cu', 'csrc/transformer/softmax_kernels.cu', 'csrc/transformer/general_kernels.cu' ] def include_paths(self): return ['csrc/includes'] def nvcc_args(self): args = [ '-O3', #'--use_fast_math', '-std=c++14', #'-U__CUDA_NO_HALF_OPERATORS__', #'-U__CUDA_NO_HALF_CONVERSIONS__', #'-U__CUDA_NO_HALF2_OPERATORS__' ] return args #+ self.compute_capability_args() def cxx_args(self): return ['-O3', '-std=c++14', '-g', '-Wno-reorder', '-Wno-c++11-narrowing']