Unverified Commit 21a77242 authored by Sourab Mangrulkar's avatar Sourab Mangrulkar Committed by GitHub
Browse files

Migrate HFDeepSpeedConfig from trfrs to accelerate (#17623)



* Migrate HFDeepSpeedConfig from trfrs to accelerate

* add `accelerate` to testing dep

* addressing comments

* addressing comments

Using `_shared_state` and avoiding object creation. This is necessary as `notebook_launcher` in `launcers.py` checks `len(AcceleratorState._shared_state)>0` to throw an error.

* resolving comments

1. Use simple API from accelerate to manage the deepspeed config integration
2. Update the related documentation

* reverting changes and addressing comments

* docstring correction

* addressing nits

* addressing nits

* addressing nits 3

* bumping up the accelerate version to 0.10.0

* resolving import

* update setup.py to include deepspeed dependencies

* Update dependency_versions_table.py

* fixing imports

* reverting changes to CI dependencies for "run_tests_pipelines_tf*" tests

These changes didn't help with resolving the failures and I believe this needs to be addressed in another PR.

* removing `accelerate` as hard dependency

Resolves issues related to CI Tests

* adding `accelerate` as dependency for building docs

resolves failure in Build PR Documentation test

* adding `accelerate` as dependency in "dev" to resolve doc build issue

* resolving comments

1. adding `accelerate` to extras["all"]
2. Including check for accelerate too before import HFDeepSpeedConfig from there
Co-Authored-By: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* resolving comments
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent e44a569f
...@@ -97,7 +97,7 @@ if stale_egg_info.exists(): ...@@ -97,7 +97,7 @@ if stale_egg_info.exists():
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py # 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
_deps = [ _deps = [
"Pillow", "Pillow",
"accelerate>=0.9.0", "accelerate>=0.10.0",
"black~=22.0,>=22.3", "black~=22.0,>=22.3",
"codecarbon==1.2.0", "codecarbon==1.2.0",
"cookiecutter==1.7.3", "cookiecutter==1.7.3",
...@@ -242,6 +242,7 @@ extras["tf"] = deps_list("tensorflow", "onnxconverter-common", "tf2onnx") ...@@ -242,6 +242,7 @@ extras["tf"] = deps_list("tensorflow", "onnxconverter-common", "tf2onnx")
extras["tf-cpu"] = deps_list("tensorflow-cpu", "onnxconverter-common", "tf2onnx") extras["tf-cpu"] = deps_list("tensorflow-cpu", "onnxconverter-common", "tf2onnx")
extras["torch"] = deps_list("torch") extras["torch"] = deps_list("torch")
extras["accelerate"] = deps_list("accelerate")
if os.name == "nt": # windows if os.name == "nt": # windows
extras["retrieval"] = deps_list("datasets") # faiss is not supported on windows extras["retrieval"] = deps_list("datasets") # faiss is not supported on windows
...@@ -257,7 +258,7 @@ extras["onnx"] = deps_list("onnxconverter-common", "tf2onnx") + extras["onnxrunt ...@@ -257,7 +258,7 @@ extras["onnx"] = deps_list("onnxconverter-common", "tf2onnx") + extras["onnxrunt
extras["modelcreation"] = deps_list("cookiecutter") extras["modelcreation"] = deps_list("cookiecutter")
extras["sagemaker"] = deps_list("sagemaker") extras["sagemaker"] = deps_list("sagemaker")
extras["deepspeed"] = deps_list("deepspeed") extras["deepspeed"] = deps_list("deepspeed") + extras["accelerate"]
extras["fairscale"] = deps_list("fairscale") extras["fairscale"] = deps_list("fairscale")
extras["optuna"] = deps_list("optuna") extras["optuna"] = deps_list("optuna")
extras["ray"] = deps_list("ray[tune]") extras["ray"] = deps_list("ray[tune]")
...@@ -293,9 +294,9 @@ extras["testing"] = ( ...@@ -293,9 +294,9 @@ extras["testing"] = (
"nltk", "nltk",
"GitPython", "GitPython",
"hf-doc-builder", "hf-doc-builder",
"protobuf", # Can be removed once we can unpin protobuf "protobuf", # Can be removed once we can unpin protobuf
"sacremoses", "sacremoses",
"rjieba" "rjieba",
) )
+ extras["retrieval"] + extras["retrieval"]
+ extras["modelcreation"] + extras["modelcreation"]
...@@ -316,6 +317,7 @@ extras["all"] = ( ...@@ -316,6 +317,7 @@ extras["all"] = (
+ extras["integrations"] + extras["integrations"]
+ extras["timm"] + extras["timm"]
+ extras["codecarbon"] + extras["codecarbon"]
+ extras["accelerate"]
) )
# Might need to add doc-builder and some specific deps in the future # Might need to add doc-builder and some specific deps in the future
...@@ -325,8 +327,8 @@ extras["docs_specific"] = ["hf-doc-builder"] ...@@ -325,8 +327,8 @@ extras["docs_specific"] = ["hf-doc-builder"]
extras["docs"] = extras["all"] + extras["docs_specific"] extras["docs"] = extras["all"] + extras["docs_specific"]
extras["dev-torch"] = ( extras["dev-torch"] = (
extras['testing'] extras["testing"]
+ extras['torch'] + extras["torch"]
+ extras["sentencepiece"] + extras["sentencepiece"]
+ extras["tokenizers"] + extras["tokenizers"]
+ extras["torch-speech"] + extras["torch-speech"]
...@@ -342,17 +344,17 @@ extras["dev-torch"] = ( ...@@ -342,17 +344,17 @@ extras["dev-torch"] = (
+ extras["onnxruntime"] + extras["onnxruntime"]
) )
extras["dev-tensorflow"] = ( extras["dev-tensorflow"] = (
extras['testing'] extras["testing"]
+ extras['tf'] + extras["tf"]
+ extras["sentencepiece"] + extras["sentencepiece"]
+ extras["tokenizers"] + extras["tokenizers"]
+ extras["vision"] + extras["vision"]
+ extras["quality"] + extras["quality"]
+ extras["docs_specific"] + extras["docs_specific"]
+ extras["sklearn"] + extras["sklearn"]
+ extras["modelcreation"] + extras["modelcreation"]
+ extras["onnx"] + extras["onnx"]
+ extras["tf-speech"] + extras["tf-speech"]
) )
extras["dev"] = ( extras["dev"] = (
extras["all"] extras["all"]
......
...@@ -16,14 +16,12 @@ Integration with Deepspeed ...@@ -16,14 +16,12 @@ Integration with Deepspeed
""" """
import importlib.util import importlib.util
import io
import json
import weakref import weakref
from copy import deepcopy from copy import deepcopy
from functools import partialmethod from functools import partialmethod
from .dependency_versions_check import dep_version_check from .dependency_versions_check import dep_version_check
from .utils import is_torch_available, logging from .utils import is_accelerate_available, is_torch_available, logging
if is_torch_available(): if is_torch_available():
...@@ -36,7 +34,15 @@ def is_deepspeed_available(): ...@@ -36,7 +34,15 @@ def is_deepspeed_available():
return importlib.util.find_spec("deepspeed") is not None return importlib.util.find_spec("deepspeed") is not None
class HfDeepSpeedConfig: if is_accelerate_available() and is_deepspeed_available():
from accelerate.utils.deepspeed import HfDeepSpeedConfig as DeepSpeedConfig
else:
# Inherits from a dummy `object` if accelerate is not available, so that python succeeds to import this file.
# Deepspeed glue code will never inherit this dummy object as it checks if accelerate is available.
from builtins import object as DeepSpeedConfig
class HfDeepSpeedConfig(DeepSpeedConfig):
""" """
This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage. This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.
...@@ -56,108 +62,9 @@ class HfDeepSpeedConfig: ...@@ -56,108 +62,9 @@ class HfDeepSpeedConfig:
def __init__(self, config_file_or_dict): def __init__(self, config_file_or_dict):
# set global weakref object # set global weakref object
set_hf_deepspeed_config(self) set_hf_deepspeed_config(self)
dep_version_check("accelerate")
dep_version_check("deepspeed") dep_version_check("deepspeed")
super().__init__(config_file_or_dict)
if isinstance(config_file_or_dict, dict):
# Don't modify user's data should they want to reuse it (e.g. in tests), because once we
# modified it, it will not be accepted here again, since `auto` values would have been overridden
config = deepcopy(config_file_or_dict)
elif isinstance(config_file_or_dict, str):
with io.open(config_file_or_dict, "r", encoding="utf-8") as f:
config = json.load(f)
else:
raise ValueError("expecting either a path to a DeepSpeed config file or a pre-populated dict")
self.config = config
# zero stage - this is done as early as possible, before model is created, to allow
# ``is_deepspeed_zero3_enabled`` query and getting to the early deepspeed config object
# during ``zero.Init()`` which needs to know the dtype, and some other hparams.
self._stage = self.get_value("zero_optimization.stage", -1)
# offload
self._offload = False
if self.is_zero2() or self.is_zero3():
offload_devices_valid = set(["cpu", "nvme"])
offload_devices = set(
[
self.get_value("zero_optimization.offload_optimizer.device"),
self.get_value("zero_optimization.offload_param.device"),
]
)
if len(offload_devices & offload_devices_valid) > 0:
self._offload = True
def find_config_node(self, ds_key_long):
config = self.config
# find the config node of interest if it exists
nodes = ds_key_long.split(".")
ds_key = nodes.pop()
for node in nodes:
config = config.get(node)
if config is None:
return None, ds_key
return config, ds_key
def get_value(self, ds_key_long, default=None):
"""
Returns the set value or `default` if no value is set
"""
config, ds_key = self.find_config_node(ds_key_long)
if config is None:
return default
return config.get(ds_key, default)
def del_config_sub_tree(self, ds_key_long, must_exist=False):
"""
Deletes a sub-section of the config file if it's found.
Unless `must_exist` is `True` the section doesn't have to exist.
"""
config = self.config
# find the config node of interest if it exists
nodes = ds_key_long.split(".")
for node in nodes:
parent_config = config
config = config.get(node)
if config is None:
if must_exist:
raise ValueError(f"Can't find {ds_key_long} entry in the config: {self.config}")
else:
return
# if found remove it
if parent_config is not None:
parent_config.pop(node)
def is_true(self, ds_key_long):
"""
Returns `True`/``False` only if the value is set, always `False` otherwise. So use this method to ask the very
specific question of whether the value is set to `True` (and it's not set to `False`` or isn't set).
"""
value = self.get_value(ds_key_long)
return False if value is None else bool(value)
def is_false(self, ds_key_long):
"""
Returns `True`/``False` only if the value is set, always `False` otherwise. So use this method to ask the very
specific question of whether the value is set to `False` (and it's not set to `True`` or isn't set).
"""
value = self.get_value(ds_key_long)
return False if value is None else not bool(value)
def is_zero2(self):
return self._stage == 2
def is_zero3(self):
return self._stage == 3
def is_offload(self):
return self._offload
class HfTrainerDeepSpeedConfig(HfDeepSpeedConfig): class HfTrainerDeepSpeedConfig(HfDeepSpeedConfig):
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# 2. run `make deps_table_update`` # 2. run `make deps_table_update``
deps = { deps = {
"Pillow": "Pillow", "Pillow": "Pillow",
"accelerate": "accelerate>=0.9.0", "accelerate": "accelerate>=0.10.0",
"black": "black~=22.0,>=22.3", "black": "black~=22.0,>=22.3",
"codecarbon": "codecarbon==1.2.0", "codecarbon": "codecarbon==1.2.0",
"cookiecutter": "cookiecutter==1.7.3", "cookiecutter": "cookiecutter==1.7.3",
......
...@@ -35,6 +35,7 @@ from .utils import ( ...@@ -35,6 +35,7 @@ from .utils import (
ExplicitEnum, ExplicitEnum,
cached_property, cached_property,
get_full_repo_name, get_full_repo_name,
is_accelerate_available,
is_sagemaker_dp_enabled, is_sagemaker_dp_enabled,
is_sagemaker_mp_enabled, is_sagemaker_mp_enabled,
is_torch_available, is_torch_available,
...@@ -1163,6 +1164,8 @@ class TrainingArguments: ...@@ -1163,6 +1164,8 @@ class TrainingArguments:
if self.deepspeed: if self.deepspeed:
# - must be run very last in arg parsing, since it will use a lot of these settings. # - must be run very last in arg parsing, since it will use a lot of these settings.
# - must be run before the model is created. # - must be run before the model is created.
if not is_accelerate_available():
raise ValueError("--deepspeed requires Accelerate to be installed: `pip install accelerate`.")
from transformers.deepspeed import HfTrainerDeepSpeedConfig from transformers.deepspeed import HfTrainerDeepSpeedConfig
# will be used later by the Trainer # will be used later by the Trainer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment