"src/array/cuda/utils.hip" did not exist on "62af41c245d924e7fe0837b34ebc9f6cfa613594"
Unverified Commit 0dc84200 authored by Jeff Rasley's avatar Jeff Rasley Committed by GitHub
Browse files

Dependency pruning (#528)

* remove cpu-feature

* remove psutils requirement
parent d779bd53
...@@ -3,11 +3,17 @@ Copyright 2019 The Microsoft DeepSpeed Team ...@@ -3,11 +3,17 @@ Copyright 2019 The Microsoft DeepSpeed Team
''' '''
import time import time
import psutil
import torch import torch
from deepspeed.utils import logger from deepspeed.utils import logger
try:
import psutil
PSUTILS_INSTALLED = True
except ImportError:
PSUTILS_INSTALLED = False
pass
def print_rank_0(message): def print_rank_0(message):
if torch.distributed.is_initialized(): if torch.distributed.is_initialized():
...@@ -103,7 +109,7 @@ class ThroughputTimer(): ...@@ -103,7 +109,7 @@ class ThroughputTimer():
num_workers, num_workers,
start_step=2, start_step=2,
steps_per_output=50, steps_per_output=50,
monitor_memory=True, monitor_memory=False,
logging_fn=None): logging_fn=None):
self.start_time = 0 self.start_time = 0
self.end_time = 0 self.end_time = 0
...@@ -124,6 +130,9 @@ class ThroughputTimer(): ...@@ -124,6 +130,9 @@ class ThroughputTimer():
self.logging = logger.info self.logging = logger.info
self.initialized = False self.initialized = False
if self.monitor_memory and not PSUTILS_INSTALLED:
raise ImportError("Unable to import 'psutils', please install package")
def update_epoch_count(self): def update_epoch_count(self):
self.epoch_count += 1 self.epoch_count += 1
self.local_step_count = 0 self.local_step_count = 0
......
import os import os
import torch import torch
import warnings import subprocess
from .builder import CUDAOpBuilder from .builder import CUDAOpBuilder
...@@ -21,35 +21,26 @@ class CPUAdamBuilder(CUDAOpBuilder): ...@@ -21,35 +21,26 @@ class CPUAdamBuilder(CUDAOpBuilder):
CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include") CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include")
return ['csrc/includes', CUDA_INCLUDE] return ['csrc/includes', CUDA_INCLUDE]
def available_vector_instructions(self): def simd_width(self):
try: if not self.command_exists('lscpu'):
import cpufeature self.warning(
except ImportError: "CPUAdam attempted to query 'lscpu' to detect the existence "
warnings.warn( "of AVX instructions. However, 'lscpu' does not appear to exist on "
f'import cpufeature failed - CPU vector optimizations are not available for CPUAdam' "your system, will fall back to non-vectorized execution.")
) return ''
return {}
cpu_vector_instructions = {} result = subprocess.check_output('lscpu', shell=True)
try: result = result.decode('utf-8').strip().lower()
cpu_vector_instructions = cpufeature.CPUFeature if 'genuineintel' in result:
except _: if 'avx512' in result:
warnings.warn( return '-D__AVX512__'
f'cpufeature.CPUFeature failed - CPU vector optimizations are not available for CPUAdam' elif 'avx2' in result:
) return '-D__AVX256__'
return {} return ''
return cpu_vector_instructions
def cxx_args(self): def cxx_args(self):
CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64") CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64")
cpu_info = self.available_vector_instructions() SIMD_WIDTH = self.simd_width()
SIMD_WIDTH = ''
if 'Intel' in cpu_info.get('VendorId', ''):
if cpu_info.get('AVX512f', False):
SIMD_WIDTH = '-D__AVX512__'
elif cpu_info.get('AVX2', False):
SIMD_WIDTH = '-D__AVX256__'
return [ return [
'-O3', '-O3',
......
torch>=1.2 torch>=1.2
torchvision>=0.4.0 torchvision>=0.4.0
tqdm tqdm
psutil
tensorboardX==1.8 tensorboardX==1.8
ninja ninja
cpufeature
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment