Unverified Commit 7b8be2a7 authored by Olatunji Ruwase's avatar Olatunji Ruwase Committed by GitHub
Browse files

Disable default installation of CPU Adam (#450)

* Disable default installation of CPU Adam

* Handle cpufeature import/use errors separately
parent 6f28ea30
...@@ -15,7 +15,6 @@ import collections ...@@ -15,7 +15,6 @@ import collections
from deepspeed.runtime.fp16.loss_scaler import LossScaler, DynamicLossScaler from deepspeed.runtime.fp16.loss_scaler import LossScaler, DynamicLossScaler
from deepspeed.runtime.utils import see_memory_usage, is_model_parallel_parameter from deepspeed.runtime.utils import see_memory_usage, is_model_parallel_parameter
from deepspeed.runtime.zero.config import ZERO_OPTIMIZATION_GRADIENTS from deepspeed.runtime.zero.config import ZERO_OPTIMIZATION_GRADIENTS
from deepspeed.ops.adam import DeepSpeedCPUAdam
from deepspeed.utils import logger from deepspeed.utils import logger
#Toggle this to true to enable correctness test #Toggle this to true to enable correctness test
...@@ -1416,6 +1415,7 @@ class FP16_DeepSpeedZeroOptimizer(object): ...@@ -1416,6 +1415,7 @@ class FP16_DeepSpeedZeroOptimizer(object):
#torch.set_num_threads(12) #torch.set_num_threads(12)
timers('optimizer_step').start() timers('optimizer_step').start()
if self.deepspeed_adam_offload: if self.deepspeed_adam_offload:
from deepspeed.ops.adam import DeepSpeedCPUAdam
self.optimizer.step(fp16_param_groups=self.parallel_partitioned_fp16_groups) self.optimizer.step(fp16_param_groups=self.parallel_partitioned_fp16_groups)
#self.optimizer.step() #self.optimizer.step()
#for fp16_partitions, fp32_partition in zip(self.parallel_partitioned_fp16_groups, self.single_partition_of_fp32_groups): #for fp16_partitions, fp32_partition in zip(self.parallel_partitioned_fp16_groups, self.single_partition_of_fp32_groups):
......
...@@ -13,7 +13,6 @@ import torch ...@@ -13,7 +13,6 @@ import torch
import shutil import shutil
import subprocess import subprocess
import warnings import warnings
import cpufeature
from setuptools import setup, find_packages from setuptools import setup, find_packages
from torch.utils.cpp_extension import CUDAExtension, BuildExtension, CppExtension from torch.utils.cpp_extension import CUDAExtension, BuildExtension, CppExtension
...@@ -25,6 +24,27 @@ def fetch_requirements(path): ...@@ -25,6 +24,27 @@ def fetch_requirements(path):
return [r.strip() for r in fd.readlines()] return [r.strip() for r in fd.readlines()]
def available_vector_instructions():
try:
import cpufeature
except ImportError:
warnings.warn(
f'import cpufeature failed - CPU vector optimizations are not available for CPUAdam'
)
return {}
cpu_vector_instructions = {}
try:
cpu_vector_instructions = cpufeature.CPUFeature
except _:
warnings.warn(
f'cpufeature.CPUFeature failed - CPU vector optimizations are not available for CPUAdam'
)
return {}
return cpu_vector_instructions
install_requires = fetch_requirements('requirements/requirements.txt') install_requires = fetch_requirements('requirements/requirements.txt')
dev_requires = fetch_requirements('requirements/requirements-dev.txt') dev_requires = fetch_requirements('requirements/requirements-dev.txt')
sparse_attn_requires = fetch_requirements('requirements/requirements-sparse-attn.txt') sparse_attn_requires = fetch_requirements('requirements/requirements-sparse-attn.txt')
...@@ -43,29 +63,26 @@ TRANSFORMER = "transformer" ...@@ -43,29 +63,26 @@ TRANSFORMER = "transformer"
SPARSE_ATTN = "sparse-attn" SPARSE_ATTN = "sparse-attn"
CPU_ADAM = "cpu-adam" CPU_ADAM = "cpu-adam"
cpu_vector_instructions = available_vector_instructions()
# Build environment variables for custom builds # Build environment variables for custom builds
DS_BUILD_LAMB_MASK = 1 DS_BUILD_LAMB_MASK = 1
DS_BUILD_TRANSFORMER_MASK = 10 DS_BUILD_TRANSFORMER_MASK = 10
DS_BUILD_SPARSE_ATTN_MASK = 100 DS_BUILD_SPARSE_ATTN_MASK = 100
DS_BUILD_CPU_ADAM_MASK = 1000 DS_BUILD_CPU_ADAM_MASK = 1000
DS_BUILD_AVX512_MASK = 10000
# Allow for build_cuda to turn on or off all ops # Allow for build_cuda to turn on or off all ops
DS_BUILD_ALL_OPS = DS_BUILD_LAMB_MASK | DS_BUILD_TRANSFORMER_MASK | DS_BUILD_SPARSE_ATTN_MASK | DS_BUILD_CPU_ADAM_MASK | DS_BUILD_AVX512_MASK DS_BUILD_ALL_OPS = DS_BUILD_LAMB_MASK | DS_BUILD_TRANSFORMER_MASK | DS_BUILD_SPARSE_ATTN_MASK | DS_BUILD_CPU_ADAM_MASK
DS_BUILD_CUDA = int(os.environ.get('DS_BUILD_CUDA', 1)) * DS_BUILD_ALL_OPS DS_BUILD_CUDA = int(os.environ.get('DS_BUILD_CUDA', 1)) * DS_BUILD_ALL_OPS
# Set default of each op based on if build_cuda is set # Set default of each op based on if build_cuda is set
OP_DEFAULT = DS_BUILD_CUDA == DS_BUILD_ALL_OPS OP_DEFAULT = DS_BUILD_CUDA == DS_BUILD_ALL_OPS
DS_BUILD_CPU_ADAM = int(os.environ.get('DS_BUILD_CPU_ADAM', DS_BUILD_CPU_ADAM = int(os.environ.get('DS_BUILD_CPU_ADAM', 0)) * DS_BUILD_CPU_ADAM_MASK
OP_DEFAULT)) * DS_BUILD_CPU_ADAM_MASK
DS_BUILD_LAMB = int(os.environ.get('DS_BUILD_LAMB', OP_DEFAULT)) * DS_BUILD_LAMB_MASK DS_BUILD_LAMB = int(os.environ.get('DS_BUILD_LAMB', OP_DEFAULT)) * DS_BUILD_LAMB_MASK
DS_BUILD_TRANSFORMER = int(os.environ.get('DS_BUILD_TRANSFORMER', DS_BUILD_TRANSFORMER = int(os.environ.get('DS_BUILD_TRANSFORMER',
OP_DEFAULT)) * DS_BUILD_TRANSFORMER_MASK OP_DEFAULT)) * DS_BUILD_TRANSFORMER_MASK
DS_BUILD_SPARSE_ATTN = int(os.environ.get('DS_BUILD_SPARSE_ATTN', DS_BUILD_SPARSE_ATTN = int(os.environ.get('DS_BUILD_SPARSE_ATTN',
OP_DEFAULT)) * DS_BUILD_SPARSE_ATTN_MASK OP_DEFAULT)) * DS_BUILD_SPARSE_ATTN_MASK
DS_BUILD_AVX512 = int(os.environ.get(
'DS_BUILD_AVX512',
cpufeature.CPUFeature['AVX512f'])) * DS_BUILD_AVX512_MASK
# Final effective mask is the bitwise OR of each op # Final effective mask is the bitwise OR of each op
BUILD_MASK = (DS_BUILD_LAMB | DS_BUILD_TRANSFORMER | DS_BUILD_SPARSE_ATTN BUILD_MASK = (DS_BUILD_LAMB | DS_BUILD_TRANSFORMER | DS_BUILD_SPARSE_ATTN
...@@ -111,11 +128,10 @@ if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4): ...@@ -111,11 +128,10 @@ if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
version_ge_1_5 = ['-DVERSION_GE_1_5'] version_ge_1_5 = ['-DVERSION_GE_1_5']
version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5 version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5
cpu_info = cpufeature.CPUFeature
SIMD_WIDTH = '' SIMD_WIDTH = ''
if cpu_info['AVX512f'] and DS_BUILD_AVX512: if cpu_vector_instructions.get('AVX512f', False):
SIMD_WIDTH = '-D__AVX512__' SIMD_WIDTH = '-D__AVX512__'
elif cpu_info['AVX2']: elif cpu_vector_instructions.get('AVX2', False):
SIMD_WIDTH = '-D__AVX256__' SIMD_WIDTH = '-D__AVX256__'
print("SIMD_WIDTH = ", SIMD_WIDTH) print("SIMD_WIDTH = ", SIMD_WIDTH)
......
...@@ -7,10 +7,10 @@ import pytest ...@@ -7,10 +7,10 @@ import pytest
import copy import copy
import deepspeed import deepspeed
from deepspeed.ops.adam import DeepSpeedCPUAdam
if not deepspeed.ops.__installed_ops__['cpu-adam']: if not deepspeed.ops.__installed_ops__['cpu-adam']:
pytest.skip("cpu-adam is not installed", allow_module_level=True) pytest.skip("cpu-adam is not installed", allow_module_level=True)
else:
from deepspeed.ops.adam import DeepSpeedCPUAdam
def check_equal(first, second, atol=1e-2, verbose=False): def check_equal(first, second, atol=1e-2, verbose=False):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment