Merge branch 'ds-v0.9.2-rocm' into 'main'

Ds v0.9.2 rocm See merge request dcutoolkit/deeplearing/deepspeed!2

Merge branch 'ds-v0.9.2-rocm' into 'main'
Ds v0.9.2 rocm See merge request dcutoolkit/deeplearing/deepspeed!2
c25a91b6 · aiss · d1596c94 · af82b300 · c25a91b6 · 20ea07a2
Commit c25a91b6 authored May 30, 2023 by aiss
20 changed files
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -7,7 +7,7 @@
 # top-level repo folders
-/.github/ @jeffra @mrwyattii
+/.github/ @jeffra @mrwyattii @loadams
 /azure/ @jeffra @awan-10
 /benchmarks/ @jeffra @awan-10 @mrwyattii @molly-smith
 /bin/ @jeffra

--- a/DeepSpeedExamples @ 20ea07a2
+++ b/DeepSpeedExamples @ 20ea07a2
-Subproject commit 20ea07a2a069696abec212e25476a9bf76aced70
--- a/LICENSE
+++ b/LICENSE
-    MIT License
+                                 Apache License
+                           Version 2.0, January 2004
-    Copyright (c) Microsoft Corporation.
+                        http://www.apache.org/licenses/
-    Permission is hereby granted, free of charge, to any person obtaining a copy
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-    of this software and associated documentation files (the "Software"), to deal
-    in the Software without restriction, including without limitation the rights
+   1. Definitions.
-    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-    copies of the Software, and to permit persons to whom the Software is
+      "License" shall mean the terms and conditions for use, reproduction,
-    furnished to do so, subject to the following conditions:
+      and distribution as defined by Sections 1 through 9 of this document.
-    The above copyright notice and this permission notice shall be included in all
+      "Licensor" shall mean the copyright owner or entity authorized by
-    copies or substantial portions of the Software.
+      the copyright owner that is granting the License.
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+      "Legal Entity" shall mean the union of the acting entity and all
-    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+      other entities that control, are controlled by, or are under common
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+      control with that entity. For the purposes of this definition,
-    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+      "control" means (i) the power, direct or indirect, to cause the
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+      direction or management of such entity, whether by contract or
-    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-    SOFTWARE
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
-[![License MIT](https://badgen.net/badge/license/MIT/blue)](https://github.com/Microsoft/DeepSpeed/blob/master/LICENSE)
+[![License Apache 2.0](https://badgen.net/badge/license/apache2.0/blue)](https://github.com/Microsoft/DeepSpeed/blob/master/LICENSE)
 [![PyPI version](https://badge.fury.io/py/deepspeed.svg)](https://pypi.org/project/deepspeed/)
 [![Downloads](https://pepy.tech/badge/deepspeed)](https://pepy.tech/project/deepspeed)
 [![Build](https://badgen.net/badge/build/check-status/blue)](#build-pipeline-status)
+[![Twitter](https://img.shields.io/twitter/follow/MSFTDeepSpeed)](https://twitter.com/intent/follow?screen_name=MSFTDeepSpeed)
 <div align="center">
@@ -10,9 +11,11 @@
 </div>
 ## Latest News
-<b> DeepSpeed trained the world's most powerful language models ([MT-530B](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/), [BLOOM](https://huggingface.co/blog/bloom-megatron-deepspeed)); [learn how](https://www.deepspeed.ai/tutorials/large-models-w-deepspeed/).</b>
+<b> <span style="color:orange" > DeepSpeed empowers ChatGPT-like model training with a single click, offering 15x speedup over SOTA RLHF systems with unprecedented cost reduction at all scales; [learn how](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat)</span>.</b>
-* [2023/02] [Automatic Tensor Parallelism: Enables tensor parallelism by default without providing an injection policy](https://www.deepspeed.ai/tutorials/automatic-tensor-parallelism/)
+* ***[2023/04] 🚀 [DeepSpeed Chat: Easy, Fast and Affordable RLHF Training of ChatGPT-like Models at All Scales](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat)*** [[English](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat/README.md)] [[中文](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat/chinese/README.md)] [[日本語](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat/japanese/README.md)]🚀
+* [2023/03] [Scaling Large-Scale Generative Mixture-of-Expert Multimodal Model With VL-MoE](https://www.deepspeed.ai/2023/03/30/multi-modal.html)
+* [2023/02] [Automatic Tensor Parallelism: Enables tensor parallelism by default without an injection policy](https://www.deepspeed.ai/tutorials/automatic-tensor-parallelism/)
 * [2022/12] [DeepSpeed Data Efficiency: A composable library that makes better use of data, increases training efficiency, and improves model quality](https://www.deepspeed.ai/2022/12/11/data-efficiency.html)
 * [2022/11] [Stable Diffusion Image Generation under 1 second w. DeepSpeed MII](https://github.com/microsoft/DeepSpeed-MII/tree/main/examples/benchmark/txt2img)
 * [2022/10] [DeepSpeed-MII: instant speedup on 24,000+ open-source DL models with up to 40x cheaper inference](https://www.deepspeed.ai/2022/10/10/mii.html)
@@ -23,7 +26,7 @@
 # Extreme Speed and Scale for DL Training and Inference
-[DeepSpeed](https://www.deepspeed.ai/) is an easy-to-use deep learning optimization software suite that enables unprecedented scale and speed for Deep Learning Training and Inference. With DeepSpeed you can:
+***[DeepSpeed](https://www.deepspeed.ai/) enables world's most powerful language models like [MT-530B](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/) and [BLOOM](https://huggingface.co/blog/bloom-megatron-deepspeed)***. It is an easy-to-use deep learning optimization software suite that powers unprecedented scale and speed for both training and inference. With DeepSpeed you can:
 * Train/Inference dense or sparse models with billions or trillions of parameters
 * Achieve excellent system throughput and efficiently scale to thousands of GPUs
@@ -94,8 +97,8 @@ DeepSpeed has been integrated with several different popular open-source DL fram
 | ---------------------------------------------------------------------------------------------- | -------------------------------------------- |
 <img src="docs/assets/images/transformers-light.png#gh-light-mode-only" width="250px"><img src="docs/assets/images/transformers-dark.png#gh-dark-mode-only" width="250px"> | [Transformers with DeepSpeed](https://huggingface.co/docs/transformers/main/main_classes/deepspeed) |
 | <img src="docs/assets/images/accelerate-light.png#gh-light-mode-only" width="250px"><img src="docs/assets/images/accelerate-dark.png#gh-dark-mode-only" width="250px"> | [Accelerate with DeepSpeed](https://huggingface.co/docs/accelerate/usage_guides/deepspeed) |
-| <img src="docs/assets/images/lightning-light.svg#gh-light-mode-only" width="200px"><img src="docs/assets/images/lightning-dark.svg#gh-dark-mode-only" width="200px"> | [Lightning with DeepSpeed](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.strategies.DeepSpeedStrategy.html) |
+| <img src="docs/assets/images/lightning-light.svg#gh-light-mode-only" width="200px"><img src="docs/assets/images/lightning-dark.svg#gh-dark-mode-only" width="200px"> | [Lightning with DeepSpeed](https://lightning.ai/docs/pytorch/stable/advanced/model_parallel.html#deepspeed) |
-| <img src="docs/assets/images/mosaicml.svg" width="200px"> | [MosaicML with DeepSpeed](https://docs.mosaicml.com/en/latest/trainer/using_the_trainer.html?highlight=deepspeed#deepspeed-integration) |
+| <img src="docs/assets/images/mosaicml.svg" width="200px"> | [MosaicML with DeepSpeed](https://docs.mosaicml.com/projects/composer/en/latest/trainer/using_the_trainer.html?highlight=deepspeed#deepspeed-integration) |
 | <img src="docs/assets/images/determined.svg" width="225px"> | [Determined with DeepSpeed](https://docs.determined.ai/latest/training/apis-howto/deepspeed/overview.html) |
 ---
@@ -104,11 +107,12 @@ DeepSpeed has been integrated with several different popular open-source DL fram
 | Description | Status |
 | ----------- | ------ |
-| NVIDIA | [![nv-torch12-p40](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch12-p40.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch12-p40.yml) [![nv-torch18-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch18-v100.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch18-v100.yml) [![nv-torch-latest-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-v100.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-v100.yml) [![nv-inference](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-inference.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-inference.yml) [![nv-nightly](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-nightly.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-nightly.yml) |
+| NVIDIA | [![nv-torch19-p40](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch19-p40.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch19-p40.yml) [![nv-torch19-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch19-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch19-v100.yml) [![nv-torch-latest-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-v100.yml) [![nv-inference](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-inference.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-inference.yml) [![nv-nightly](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-nightly.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-nightly.yml) |
-| AMD | [![amd](https://github.com/microsoft/DeepSpeed/actions/workflows/amd.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/amd.yml) |
+| AMD | [![amd-mi100](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi100.yml) [![amd-mi200](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi200.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi200.yml) |
-| PyTorch Nightly | [![nv-torch-nightly-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-nightly-v100.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-nightly-v100.yml) |
+| CPU | [![nv-torch-latest-cpu](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-cpu.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-cpu.yml) |
-| Integrations | [![nv-transformers-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml) [![nv-lightning-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml) [![nv-accelerate-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml) |
+| PyTorch Nightly | [![nv-torch-nightly-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-nightly-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-nightly-v100.yml) |
-| Misc | [![Formatting](https://github.com/microsoft/DeepSpeed/actions/workflows/formatting.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/formatting.yml) [![pages-build-deployment](https://github.com/microsoft/DeepSpeed/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/pages/pages-build-deployment) [![Documentation Status](https://readthedocs.org/projects/deepspeed/badge/?version=latest)](https://deepspeed.readthedocs.io/en/latest/?badge=latest)|
+| Integrations | [![nv-transformers-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml) [![nv-lightning-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml) [![nv-accelerate-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml)[![nv-megatron](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-megatron.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-megatron.yml)[![nv-mii](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-mii.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-mii.yml) |
+| Misc | [![Formatting](https://github.com/microsoft/DeepSpeed/actions/workflows/formatting.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/formatting.yml) [![pages-build-deployment](https://github.com/microsoft/DeepSpeed/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/pages/pages-build-deployment) [![Documentation Status](https://readthedocs.org/projects/deepspeed/badge/?version=latest)](https://deepspeed.readthedocs.io/en/latest/?badge=latest)[![python](https://github.com/microsoft/DeepSpeed/actions/workflows/python.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/python.yml) |
 # Installation
@@ -122,7 +126,7 @@ dynamically link them at runtime.
 ## Requirements
 * [PyTorch](https://pytorch.org/) must be installed _before_ installing DeepSpeed.
-* For full feature support we recommend a version of PyTorch that is >= 1.8 and ideally the latest PyTorch stable release.
+* For full feature support we recommend a version of PyTorch that is >= 1.9 and ideally the latest PyTorch stable release.
 * A CUDA or ROCm compiler such as [nvcc](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#introduction) or [hipcc](https://github.com/ROCm-Developer-Tools/HIPCC) used to compile C++/CUDA/HIP extensions.
 * Specific GPUs we develop and test against are listed below, this doesn't mean your GPU will not work if it doesn't fall into this category it's just DeepSpeed is most well tested on the following:
  * NVIDIA: Pascal, Volta, Ampere, and Hopper architectures
@@ -174,7 +178,12 @@ All DeepSpeed documentation, tutorials, and blogs can be found on our website: [
 # Contributing
 DeepSpeed welcomes your contributions! Please see our
 [contributing](CONTRIBUTING.md) guide for more details on formatting, testing,
-etc.
+etc.<br/>
+Thanks so much to all of our amazing contributors!
+<a href="https://github.com/microsoft/DeepSpeed/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=microsoft/DeepSpeed&r="  width="800px"/>
+</a>
 ## Contributor License Agreement
 This project welcomes contributions and suggestions. Most contributions require you to
@@ -210,6 +219,10 @@ Conduct](https://opensource.microsoft.com/codeofconduct/). For more information
 14. Reza Yazdani Aminabadi, Samyam Rajbhandari, Minjia Zhang, Ammar Ahmad Awan, Cheng Li, Du Li, Elton Zheng, Jeff Rasley, Shaden Smith, Olatunji Ruwase, Yuxiong He. (2022) DeepSpeed Inference: Enabling Efficient Inference of Transformer Models at Unprecedented Scale. [arXiv:2207.00032](https://arxiv.org/abs/2207.00032) and [SC 2022](https://dl.acm.org/doi/abs/10.5555/3571885.3571946).
 15. Zhewei Yao, Xiaoxia Wu, Conglong Li, Connor Holmes, Minjia Zhang, Cheng Li, Yuxiong He. (2022) Random-LTD: Random and Layerwise Token Dropping Brings Efficient Training for Large-scale Transformers. [arXiv:2211.11586](https://arxiv.org/abs/2211.11586).
 16. Conglong Li, Zhewei Yao, Xiaoxia Wu, Minjia Zhang, Yuxiong He. (2022) DeepSpeed Data Efficiency: Improving Deep Learning Model Quality and Training Efficiency via Efficient Data Sampling and Routing. [arXiv:2212.03597](https://arxiv.org/abs/2212.03597).
+17. Xiaoxia Wu, Cheng Li, Reza Yazdani Aminabadi, Zhewei Yao, Yuxiong He. (2023) Understanding INT4 Quantization for Transformer Models: Latency Speedup, Composability, and Failure Cases. [arXiv:2301.12017](https://arxiv.org/abs/2301.12017).
+18. Syed Zawad, Cheng Li, Zhewei Yao, Elton Zheng, Yuxiong He, Feng Yan. (2023) DySR: Adaptive Super-Resolution via Algorithm and System Co-design. [ICLR:2023](https://openreview.net/forum?id=Pgtn4l6eKjv).
+19. Sheng Shen, Zhewei Yao, Chunyuan Li, Trevor Darrell, Kurt Keutzer, Yuxiong He. (2023) Scaling Vision-Language Models with Sparse Mixture of Experts. [arXiv:2303.07226](https://arxiv.org/abs/2303.07226).
+20. Quentin Anthony, Ammar Ahmad Awan, Jeff Rasley, Yuxiong He, Aamir Shafi, Mustafa Abduljabbar, Hari Subramoni, Dhabaleswar Panda. (2023) MCR-DL: Mix-and-Match Communication Runtime for Deep Learning [arXiv:2303.08374](https://arxiv.org/abs/2303.08374) and will appear at IPDPS 2023.
 # Videos

--- a/README_HIP.md
+++ b/README_HIP.md
-# DeepSpeed
-## 安装
-DeepSpeed 支持
-+ Python 3.7.
-+ Python 3.8.
-+ Python 3.9.
-### 使用pip安装
-DeepSpeed whl包下载目录：[https://cancon.hpccube.com:65024/4/main/deepspeed/dtk23.04](https://cancon.hpccube.com:65024/4/main/deepspeed/dtk23.04)
-根据对应的pytorch版本和python版本，下载对应deepspeed的whl包
-```shell
-pip install deepspeed* (下载的deepspeed的whl包)
-```
-### 使用源码安装
-编译之前，需要先安装对应版本python，安装相应的三方包依赖项，并配置DTK环境变量(以Centos7.x为例)。
-pytorch whl包下载目录：[https://cancon.hpccube.com:65024/4/main/pytorch/dtk23.04](https://cancon.hpccube.com:65024/4/main/pytorch/dtk23.04)
-根据python版本,下载对应pytorch的whl包。如果是基于pytorch1.13，需要注释掉op_builder/builder.py中大概L659： 
-```bash
-#sources[i] = str(src.relative_to(curr_file))
-```
-安装依赖项：
-```bash
-# 安装三方包的源
-yum install  epel-release  -y
-# 安装相关依赖项
-yum install libffi-devel -y
-yum -y install openssl openssl-devel
-yum install -y libaio-devel
-yum install -y libaio
-ls -l
-# 若python内未包含相关项，需基于上面安装的三方包重新源码编译python，再配置python环境
-python3 -m pip install --upgrade pip setuptools
-pip3 install wheel -i https://pypi.tuna.tsinghua.edu.cn/simple
-pip3 install ninja -i https://pypi.tuna.tsinghua.edu.cn/simple
-```
-下载DTK并配置环境变量:
-```bash
-# DTK tar包下载目录：光合社区/资源工具/DCU Toolkit/DTK23.04(https://cancon.hpccube.com:65024/1/main/DTK-23.04)，根据系统选择对应DTK的tar包，并解压至/opt目录。
-# 如果使用的是dtk23.04前的版本，可以参考以图片下方式修改torch中的hipify文件
-export ROCM_PATH=/opt/dtk-23.04
-source /opt/dtk-23.04/env.sh
-```
-![logo](hipify_20230511113250.png)
-编译deepspeed
-```bash
-# 下载源码 
-git clone -b ds-v0.8.2-rocm http://developer.hpccube.com/codes/aicomponent/deepspeed.git
-cd deepspeed
-sh requirements/run_pip.sh
-export DS_BUILD_VERSION=abix.dtkxxx
-DS_BUILD_RANDOM_LTD=0 DS_BUILD_QUANTIZER=0 DS_BUILD_TRANSFORMER_INFERENCE=0 DS_BUILD_OPS=1 verbose=1 CXX=hipcc CC=hipcc python3 setup.py install bdist_wheel
-```
-安装deepspeed
-```bash
-# nni的whl包会在dist文件夹生成
-pip3 install ./dist/deepspeed*
-```
-## Note
-+ 若使用 pip install 下载安装过慢，可添加源：-i https://pypi.tuna.tsinghua.edu.cn/simple/
-+ DS_BUILD_VERSION为编译的版本号设置，版本号为0.8.2+gitxxx.abix.dtkxxx 
-gitxxx：为代码自动获取；abi0：使用devtools的gcc编译；abi1：使用非devtools的gcc编译; dtkxxx为dtk的版本号：例如：dtk2304
\ No newline at end of file
--- a/accelerator/__init__.py
+++ b/accelerator/__init__.py
-'''Copyright The Microsoft DeepSpeed Team'''
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
 from .abstract_accelerator import DeepSpeedAccelerator
 from .real_accelerator import get_accelerator, set_accelerator
--- a/accelerator/abstract_accelerator.py
+++ b/accelerator/abstract_accelerator.py
-'''Copyright The Microsoft DeepSpeed Team'''
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
 import abc
 from abc import ABC
 class DeepSpeedAccelerator(ABC):
    def __init__(self):
        self._name = None
        self._communication_backend_name = None

--- a/accelerator/cuda_accelerator.py
+++ b/accelerator/cuda_accelerator.py
-'''Copyright The Microsoft DeepSpeed Team'''
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
 import os
 import pkgutil
@@ -14,6 +17,7 @@ except ImportError:
 class CUDA_Accelerator(DeepSpeedAccelerator):
    def __init__(self):
        self._name = 'cuda'
        self._communication_backend_name = 'nccl'
@@ -22,13 +26,10 @@ class CUDA_Accelerator(DeepSpeedAccelerator):
        # put all valid class name <--> class type mapping into class_dict
        op_builder_dir = self.op_builder_dir()
        op_builder_module = importlib.import_module(op_builder_dir)
        for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(op_builder_module.__file__)]):
            # avoid self references
            if module_name != 'all_ops' and module_name != 'builder':
-                module = importlib.import_module("{}.{}".format(
+                module = importlib.import_module("{}.{}".format(op_builder_dir, module_name))
-                    op_builder_dir,
-                    module_name))
                for member_name in module.__dir__():
                    if member_name.endswith(
                            'Builder'
@@ -224,8 +225,9 @@ class CUDA_Accelerator(DeepSpeedAccelerator):
    def op_builder_dir(self):
        try:
-            # during installation time op_builder is visible, otherwise return deepspeed.ops.op_builder
+            # is op_builder from deepspeed or a 3p version? this should only succeed if it's deepspeed
-            import op_builder  # noqa: F401
+            # if successful this also means we're doing a local install and not JIT compile path
+            from op_builder import __deepspeed__  # noqa: F401
            return "op_builder"
        except ImportError:
            return "deepspeed.ops.op_builder"

--- a/accelerator/real_accelerator.py
+++ b/accelerator/real_accelerator.py
-'''Copyright The Microsoft DeepSpeed Team'''
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
 try:
    from accelerator.abstract_accelerator import DeepSpeedAccelerator as dsa1
@@ -23,13 +26,8 @@ def _validate_accelerator(accel_obj):
    # accelerator.abstractor_accelerator
    # or deepspeed.accelerator.abstract_accelerator, consider accel_obj
    # is a conforming object
-    if not ((dsa1 != None and isinstance(accel_obj,
+    if not ((dsa1 != None and isinstance(accel_obj, dsa1)) or (dsa2 != None and isinstance(accel_obj, dsa2))):
-                                         dsa1)) or
+        raise AssertionError(f'{accel_obj.__class__.__name__} accelerator is not subclass of DeepSpeedAccelerator')
-            (dsa2 != None and isinstance(accel_obj,
-                                         dsa2))):
-        raise AssertionError(
-            f'{accel_obj.__class__.__name__} accelerator is not subclass of DeepSpeedAccelerator'
-        )
    # TODO: turn off is_available test since this breaks tests
    #assert accel_obj.is_available(), \

--- a/azure/attach.sh
+++ b/azure/attach.sh
-#!/bin/bash
-name=${1-deepspeed}
-docker exec -i -w /home/deepspeed -t $name /bin/bash
--- a/azure/azure_config.json
+++ b/azure/azure_config.json
-{
-  "num_vms": 2,
-  "location": "southcentralus",
-  "azure_sku": "Standard_NV6_Promo",
-  "ssh_private_key": "id_rsa",
-  "docker_ssh_port": 2222
-}
--- a/azure/azure_ssh.sh
+++ b/azure/azure_ssh.sh
-#!/bin/bash
-config_file=azure_config.json
-if [ ! -f ${config_file} ]; then
-    echo "Cannot find $config_file"
-    exit 1
-fi
-location=`cat ${config_file} | jq .location | sed 's/"//g'`
-rg=deepspeed_rg_$location
-while getopts 'c:' flag; do
-  case "${flag}" in
-    c) config_file="${OPTARG}" ;;
-    *) error "Unexpected option ${flag}" ;;
-  esac
-done
-shift $(expr $OPTIND - 1)
-echo "Using $config_file"
-nodeid=$1
-cmds=${@:2}
-echo $nodeid $cmds
-ip_addr=`az vm list-ip-addresses -g $rg | jq .[${nodeid}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
-ssh_private_key=`cat ${config_file} | jq .ssh_private_key | sed 's/"//g'`
-if [ $ssh_private_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
-ssh -i ${ssh_private_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null deepspeed@${ip_addr} ${cmds}
--- a/azure/build_docker_image.sh
+++ b/azure/build_docker_image.sh
-#!/bin/bash
-docker build -t deepspeed:0.1 -f ../Dockerfile .
--- a/azure/create_vms.sh
+++ b/azure/create_vms.sh
-#!/bin/bash
-azure_config=azure_config.json
-# Make sure jq is installed
-command -v jq
-if [ $? != 0 ]; then
-    echo "Missing dependency of jq, please 'apt-get install jq'"
-    exit 1
-fi
-if [ ! -f ${azure_config} ]; then
-    echo "Cannot find $azure_config"
-    exit 1
-fi
-cat $azure_config
-num_vms=`cat ${azure_config} | jq .num_vms`
-if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi
-location=`cat ${azure_config} | jq .location | sed 's/"//g'`
-if [ $location == "null" ]; then echo 'missing location in config'; exit 1; fi
-azure_sku=`cat ${azure_config} | jq .azure_sku | sed 's/"//g'`
-if [ $azure_sku == "null" ]; then echo 'missing azure_sku in config'; exit 1; fi
-ssh_private_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
-if [ $ssh_private_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
-ssh_key=${ssh_private_key}.pub
-if [ ! -f ${ssh_private_key} ]; then
-    echo "Cannot find $ssh_private_key"
-    exit 1
-fi
-if [ ! -f ${ssh_key} ]; then
-    echo "Cannot find $ssh_key"
-    exit 1
-fi
-resource_group=deepspeed_rg_$location
-az group create --name ${resource_group} --location $location
-base_vm_name=deepspeed
-vm_image="nvidia:ngc_azure_17_11:ngc_gpu_cloud_19_11_3:19.11.3"
-az vm image terms accept --urn ${vm_image}
-for i in `seq 0 $(( num_vms - 1))`; do
-    vm_name=${base_vm_name}_$i
-    echo "creating $vm_name"
-    az vm create \
-      --resource-group ${resource_group} \
-      --name ${vm_name} \
-      --image ${vm_image} \
-      --admin-username deepspeed \
-      --size ${azure_sku} \
-      --ssh-key-values ${ssh_key}
-done
--- a/azure/setup_docker.sh
+++ b/azure/setup_docker.sh
-#!/bin/bash
-azure_config=azure_config.json
-if [ ! -f ${azure_config} ]; then
-    echo "Cannot find $azure_config"
-    exit 1
-fi
-location=`cat ${azure_config} | jq .location | sed 's/"//g'`
-rg=deepspeed_rg_$location
-parallel=true
-command -v pdsh
-if [ $? != 0 ]; then
-    echo "Installing pdsh will allow for the docker pull to be done in parallel across the cluster. See: 'apt-get install pdsh'"
-    parallel=false
-fi
-ssh_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
-if [ $ssh_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
-num_vms=`cat ${azure_config} | jq .num_vms`
-if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi
-args="-i ${ssh_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
-username=deepspeed
-update_script="
-docker pull deepspeed/deepspeed:latest;
-ln -s workdir/DeepSpeed/azure/attach.sh attach.sh;
-cd workdir/DeepSpeed;
-git pull;
-git submodule update --init --recursive;
-bash azure/start_container.sh;
-"
-if [ $parallel == true ]; then
-    echo "parallel docker pull"
-    hosts=""
-    for node_id in {0..1}; do
-        addr=`az vm list-ip-addresses  -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
-        hosts="${addr},${hosts}"
-    done
-     PDSH_RCMD_TYPE=ssh  PDSH_SSH_ARGS_APPEND=${args} pdsh -w $hosts -l ${username} $update_script
-else
-    echo "sequential docker pull"
-    for node_id in `seq 0 $((num_vms - 1))`; do
-        ip_addr=`az vm list-ip-addresses  -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
-        addr=${username}@${ip_addr}
-        ssh ${args} $addr $update_script
-    done
-fi
--- a/azure/setup_vms.sh
+++ b/azure/setup_vms.sh
-#!/bin/bash
-azure_config=azure_config.json
-if [ ! -f ${azure_config} ]; then
-    echo "Cannot find $azure_config"
-    exit 1
-fi
-location=`cat ${azure_config} | jq .location | sed 's/"//g'`
-rg=deepspeed_rg_$location
-ssh_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
-if [ $ssh_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
-docker_ssh_port=`cat ${azure_config} | jq .docker_ssh_port`
-if [ $docker_ssh_port == "null" ]; then echo 'missing docker_ssh_port in config'; exit 1; fi
-username=deepspeed
-args="-i ${ssh_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
-num_vms=`az vm list  -g $rg | jq '. | length'`
-first_ip_addr=`az vm list-ip-addresses  -g $rg | jq .[0].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
-num_slots=`ssh $args ${username}@${first_ip_addr} 'nvidia-smi -L | wc -l'`
-echo "number of slots per vm: $num_slots"
-hostfile=hostfile
-ssh_config=config
-echo -n "" > $hostfile
-echo -n "" > $ssh_config
-for node_id in `seq 0 $((num_vms - 1))`; do
-    private_ip_addr=`az vm list-ip-addresses  -g $rg | jq .[${node_id}].virtualMachine.network.privateIpAddresses[0] | sed 's/"//g'`
-    echo "worker-${node_id} slots=${num_slots}" >> hostfile
-    echo "Host worker-${node_id}
-    HostName ${private_ip_addr}
-    Port ${docker_ssh_port}
-    StrictHostKeyChecking no
-    " >> ${ssh_config}
-done
-update_script="
-sudo mkdir -p /job;
-sudo chmod -R 777 /job;
-mkdir -p workdir;
-git clone https://github.com/microsoft/DeepSpeed.git workdir/DeepSpeed;
-"
-for node_id in `seq 0 $((num_vms - 1))`; do
-    ip_addr=`az vm list-ip-addresses  -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
-    addr=${username}@${ip_addr}
-    echo "copying ssh keys, ssh config, hostfile to worker-${node_id}"
-    ssh $args ${addr} $update_script
-    scp $args ${ssh_key}* ${addr}:.ssh/
-    scp $args ${ssh_config} ${addr}:.ssh/
-    scp $args ${hostfile} ${addr}:/job/
-done
-rm $hostfile $ssh_config
--- a/azure/shutdown_vms.sh
+++ b/azure/shutdown_vms.sh
-#!/bin/bash
-azure_config=azure_config.json
-if [ ! -f ${azure_config} ]; then
-    echo "Cannot find $azure_config"
-    exit 1
-fi
-delete=0
-while getopts 'd' flag; do
-  case "${flag}" in
-    d) delete=1 ;;
-    *)
-        echo "Unexpected option ${flag}"
-        exit 1
-        ;;
-  esac
-done
-num_vms=`cat ${azure_config} | jq .num_vms`
-if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi
-location=`cat ${azure_config} | jq .location | sed 's/"//g'`
-if [ $location == "null" ]; then echo 'missing location in config'; exit 1; fi
-base_vm_name=deepspeed
-resource_group=deepspeed_rg_$location
-for i in `seq 0 $(( num_vms - 1))`; do
-    vm_name=${base_vm_name}_$i
-    if [ $delete == 0 ]; then
-        echo "deallocating $vm_name"
-        az vm deallocate --resource-group $resource_group --name $vm_name --no-wait
-    else
-        echo "deleting $vm_name"
-        az vm delete -y --resource-group $resource_group --name $vm_name --no-wait
-    fi
-done
--- a/azure/start_container.sh
+++ b/azure/start_container.sh
-#!/bin/bash
-name=${1-deepspeed}
-image=deepspeed/deepspeed:latest
-echo "starting docker image named $name"
-docker run -d -t --name $name \
-        --network host \
-        -v ${HOME}/workdir:/home/deepspeed/workdir \
-        -v ${HOME}/.ssh:/home/deepspeed/.ssh \
-        -v /job/hostfile:/job/hostfile \
-        --gpus all $image bash -c 'sudo service ssh start && sleep infinity'
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
+# DeepSpeed Benchmarks
+If you are looking for DeepSpeed benchmarks, please see the following resources:
+1. [Communication Benchmarking Suite](https://github.com/microsoft/DeepSpeedExamples/tree/master/benchmarks/communication)
+2. [Inference Benchmarks](https://github.com/microsoft/DeepSpeedExamples/tree/master/benchmarks/inference)
--- a/benchmarks/__init__.py
+++ b/benchmarks/__init__.py
-'''Copyright The Microsoft DeepSpeed Team'''