""" Copyright 2020 The Microsoft DeepSpeed Team """ import os import torch import subprocess from .builder import CUDAOpBuilder class CPUAdamBuilder(CUDAOpBuilder): BUILD_VAR = "DS_BUILD_CPU_ADAM" NAME = "cpu_adam" def __init__(self): super().__init__(name=self.NAME) def absolute_name(self): return f'deepspeed.ops.adam.{self.NAME}_op' def sources(self, is_rocm_pytorch): if is_rocm_pytorch: return ['csrc/adam/hip/cpu_adam.cpp', 'csrc/adam/hip/custom_hip_kernel.hip'] else: return ['csrc/adam/cpu_adam.cpp', 'csrc/adam/custom_cuda_kernel.cu'] def include_paths(self): #CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include") #return ['csrc/includes', CUDA_INCLUDE] return ['csrc/includes/', '/opt/rocm-3.9.1/include/'] def simd_width(self): if not self.command_exists('lscpu'): self.warning( "CPUAdam attempted to query 'lscpu' to detect the existence " "of AVX instructions. However, 'lscpu' does not appear to exist on " "your system, will fall back to non-vectorized execution.") return '' result = subprocess.check_output('lscpu', shell=True) result = result.decode('utf-8').strip().lower() if 'genuineintel' in result: if 'avx512' in result: return '-D__AVX512__' elif 'avx2' in result: return '-D__AVX256__' return '-D__SCALAR__' def cxx_args(self): #CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64") SIMD_WIDTH = self.simd_width() #return [ # '-O3', # '-std=c++14', # f'-L{CUDA_LIB64}', # '-lcudart', # '-lcublas', # '-g', # '-Wno-reorder', # '-march=native', # '-fopenmp', # SIMD_WIDTH #] return [ '-O3', '-std=c++14', '-lrocblas', '-g', '-Wno-reorder', '-march=native', '-fopenmp', '-lpthread', SIMD_WIDTH ] def nvcc_args(self): #args = [ # '-O3', # '--use_fast_math', # '-std=c++14', # '-U__CUDA_NO_HALF_OPERATORS__', # '-U__CUDA_NO_HALF_CONVERSIONS__', # '-U__CUDA_NO_HALF2_OPERATORS__' #] args = [ '-O3', #'--use_fast_math', '-fopenmp', '-lpthread', '-std=c++14', ] #args += self.compute_capability_args() return args