Commit 9e768b59 authored by zhuwenwen's avatar zhuwenwen
Browse files
parents 7bc5a8e3 8aed02b9
...@@ -4,11 +4,10 @@ on: ...@@ -4,11 +4,10 @@ on:
pull_request: pull_request:
types: [synchronize, opened, reopened] types: [synchronize, opened, reopened]
paths: paths:
- 'applications/Chat/coati/**' - "applications/Chat/coati/**"
- 'applications/Chat/requirements.txt' - "applications/Chat/requirements.txt"
- 'applications/Chat/setup.py' - "applications/Chat/setup.py"
- 'applications/Chat/examples/**' - "applications/Chat/examples/**"
jobs: jobs:
tests: tests:
...@@ -20,7 +19,7 @@ jobs: ...@@ -20,7 +19,7 @@ jobs:
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat --shm-size=10.24gb
timeout-minutes: 30 timeout-minutes: 30
defaults: defaults:
run: run:
...@@ -29,28 +28,26 @@ jobs: ...@@ -29,28 +28,26 @@ jobs:
- name: Checkout ColossalAI - name: Checkout ColossalAI
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Install ColossalAI and ChatGPT - name: Install ChatGPT
run: | run: |
pip install -e .
cd applications/Chat cd applications/Chat
pip install -v . pip install -v .
pip install -r examples/requirements.txt pip install -r examples/requirements.txt
- name: Install Transformers - name: Install Transformers
run: | run: |
cd applications/Chat pip install transformers==4.30.2
git clone https://github.com/hpcaitech/transformers
cd transformers
pip install -v .
- name: Execute Examples - name: Execute Examples
run: | run: |
cd applications/Chat cd applications/Chat
rm -rf ~/.cache/colossalai rm -rf ~/.cache/colossalai
./examples/test_ci.sh ./tests/test_inference.sh
./tests/test_benchmarks.sh
./tests/test_train.sh
env: env:
NCCL_SHM_DISABLE: 1 NCCL_SHM_DISABLE: 1
MAX_JOBS: 8 MAX_JOBS: 8
SFT_DATASET: /data/scratch/github_actions/chat/data.json SFT_DATASET: /data/scratch/github_actions/chat/data.json
PROMPT_PATH: /data/scratch/github_actions/chat/prompts_en.jsonl PROMPT_DATASET: /data/scratch/github_actions/chat/prompts_en.jsonl
PRETRAIN_DATASET: /data/scratch/github_actions/chat/alpaca_data.json PRETRAIN_DATASET: /data/scratch/github_actions/chat/alpaca_data.json
...@@ -30,9 +30,8 @@ jobs: ...@@ -30,9 +30,8 @@ jobs:
- name: Checkout ColossalAI - name: Checkout ColossalAI
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Install ColossalAI and ChatGPT - name: Install ChatGPT
run: | run: |
pip install -e .
cd applications/Chat cd applications/Chat
pip install -v . pip install -v .
pip install -r requirements-test.txt pip install -r requirements-test.txt
......
...@@ -22,13 +22,13 @@ def compare_dirs(dir1, dir2): ...@@ -22,13 +22,13 @@ def compare_dirs(dir1, dir2):
# If the corresponding item doesn't exist in the second directory, the directories are different # If the corresponding item doesn't exist in the second directory, the directories are different
if not os.path.exists(item_path2): if not os.path.exists(item_path2):
print(f'Found mismatch: {item_path1}, {item_path2}') print(f"Found mismatch: {item_path1}, {item_path2}")
return False return False
# If the corresponding item is a directory, we compare the two directories recursively # If the corresponding item is a directory, we compare the two directories recursively
if os.path.isdir(item_path1) and os.path.isdir(item_path2): if os.path.isdir(item_path1) and os.path.isdir(item_path2):
if not compare_dirs(item_path1, item_path2): if not compare_dirs(item_path1, item_path2):
print(f'Found mismatch: {item_path1}, {item_path2}') print(f"Found mismatch: {item_path1}, {item_path2}")
return False return False
# both are files # both are files
...@@ -37,16 +37,16 @@ def compare_dirs(dir1, dir2): ...@@ -37,16 +37,16 @@ def compare_dirs(dir1, dir2):
# If the corresponding item is not a file or a directory, the directories are different # If the corresponding item is not a file or a directory, the directories are different
else: else:
print(f'Found mismatch: {item_path1}, {item_path2}') print(f"Found mismatch: {item_path1}, {item_path2}")
return False return False
# If all items are the same, the directories are the same # If all items are the same, the directories are the same
return True return True
if __name__ == '__main__': if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', help="The directory where the multi-language source files are kept.") parser.add_argument("-d", "--directory", help="The directory where the multi-language source files are kept.")
args = parser.parse_args() args = parser.parse_args()
i18n_folders = os.listdir(args.directory) i18n_folders = os.listdir(args.directory)
...@@ -56,7 +56,7 @@ if __name__ == '__main__': ...@@ -56,7 +56,7 @@ if __name__ == '__main__':
for i in range(1, len(i18n_folders)): for i in range(1, len(i18n_folders)):
dir1 = i18n_folders[0] dir1 = i18n_folders[0]
dir2 = i18n_folders[i] dir2 = i18n_folders[i]
print(f'comparing {dir1} vs {dir2}') print(f"comparing {dir1} vs {dir2}")
match = compare_dirs(i18n_folders[0], i18n_folders[i]) match = compare_dirs(i18n_folders[0], i18n_folders[i])
if not match: if not match:
......
...@@ -4,7 +4,7 @@ import os ...@@ -4,7 +4,7 @@ import os
def check_inputs(input_list): def check_inputs(input_list):
for path in input_list: for path in input_list:
real_path = os.path.join('examples', path) real_path = os.path.join("examples", path)
if not os.path.exists(real_path): if not os.path.exists(real_path):
return False return False
return True return True
...@@ -12,16 +12,16 @@ def check_inputs(input_list): ...@@ -12,16 +12,16 @@ def check_inputs(input_list):
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names") parser.add_argument("-f", "--fileNameList", type=str, help="List of file names")
args = parser.parse_args() args = parser.parse_args()
name_list = args.fileNameList.split(",") name_list = args.fileNameList.split(",")
is_correct = check_inputs(name_list) is_correct = check_inputs(name_list)
if is_correct: if is_correct:
print('success') print("success")
else: else:
print('failure') print("failure")
if __name__ == '__main__': if __name__ == "__main__":
main() main()
...@@ -17,21 +17,21 @@ def show_files(path, all_files): ...@@ -17,21 +17,21 @@ def show_files(path, all_files):
def join(input_list, sep=None): def join(input_list, sep=None):
return (sep or ' ').join(input_list) return (sep or " ").join(input_list)
def main(): def main():
contents = show_files('examples/', []) contents = show_files("examples/", [])
all_loc = [] all_loc = []
for file_loc in contents: for file_loc in contents:
split_loc = file_loc.split('/') split_loc = file_loc.split("/")
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not. # must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
if len(split_loc) >= 4: if len(split_loc) >= 4:
re_loc = '/'.join(split_loc[1:3]) re_loc = "/".join(split_loc[1:3])
if re_loc not in all_loc: if re_loc not in all_loc:
all_loc.append(re_loc) all_loc.append(re_loc)
print(all_loc) print(all_loc)
if __name__ == '__main__': if __name__ == "__main__":
main() main()
...@@ -3,7 +3,7 @@ import argparse ...@@ -3,7 +3,7 @@ import argparse
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files") parser.add_argument("-f", "--fileNameList", type=str, help="The list of changed files")
args = parser.parse_args() args = parser.parse_args()
name_list = args.fileNameList.split(":") name_list = args.fileNameList.split(":")
folder_need_check = set() folder_need_check = set()
...@@ -15,10 +15,10 @@ def main(): ...@@ -15,10 +15,10 @@ def main():
# - application # - application
# - file # - file
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4: if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
folder_need_check.add('/'.join(loc.split("/")[1:3])) folder_need_check.add("/".join(loc.split("/")[1:3]))
# Output the result using print. Then the shell can get the values. # Output the result using print. Then the shell can get the values.
print(list(folder_need_check)) print(list(folder_need_check))
if __name__ == '__main__': if __name__ == "__main__":
main() main()
...@@ -7,27 +7,27 @@ import re ...@@ -7,27 +7,27 @@ import re
import requests import requests
COMMIT_API = 'https://api.github.com/repos/hpcaitech/ColossalAI/commits' COMMIT_API = "https://api.github.com/repos/hpcaitech/ColossalAI/commits"
TAGS_API = 'https://api.github.com/repos/hpcaitech/ColossalAI/tags' TAGS_API = "https://api.github.com/repos/hpcaitech/ColossalAI/tags"
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--out', type=str, help='output path for the release draft', required=True) parser.add_argument("--out", type=str, help="output path for the release draft", required=True)
parser.add_argument('--version', type=str, help='current version to release', required=True) parser.add_argument("--version", type=str, help="current version to release", required=True)
return parser.parse_args() return parser.parse_args()
def get_latest_tag_commit(headers=None): def get_latest_tag_commit(headers=None):
res = requests.get(url=TAGS_API, headers=headers) res = requests.get(url=TAGS_API, headers=headers)
data = res.json() data = res.json()
commit_hash = data[0]['commit']['sha'] commit_hash = data[0]["commit"]["sha"]
version = data[0]['name'] version = data[0]["name"]
return commit_hash, version return commit_hash, version
def get_commit_info(commit_hash, headers=None): def get_commit_info(commit_hash, headers=None):
api = f'{COMMIT_API}/{commit_hash}' api = f"{COMMIT_API}/{commit_hash}"
res = requests.get(url=api, headers=headers) res = requests.get(url=api, headers=headers)
return res.json() return res.json()
...@@ -37,7 +37,7 @@ def get_all_commit_info(since, headers=None): ...@@ -37,7 +37,7 @@ def get_all_commit_info(since, headers=None):
results = [] results = []
while True: while True:
api = f'{COMMIT_API}?since={since}&per_page=100&page={page}' api = f"{COMMIT_API}?since={since}&per_page=100&page={page}"
resp = requests.get(url=api, headers=headers) resp = requests.get(url=api, headers=headers)
data = resp.json() data = resp.json()
...@@ -53,21 +53,21 @@ def get_all_commit_info(since, headers=None): ...@@ -53,21 +53,21 @@ def get_all_commit_info(since, headers=None):
def collate_release_info(commit_info_list): def collate_release_info(commit_info_list):
results = dict() results = dict()
pattern = pattern = r'\[.*\]' pattern = pattern = r"\[.*\]"
for commit_info in commit_info_list: for commit_info in commit_info_list:
author = commit_info['commit']['author']['name'] author = commit_info["commit"]["author"]["name"]
try: try:
author_url = commit_info['author']['url'] author_url = commit_info["author"]["url"]
except: except:
# author can be None # author can be None
author_url = None author_url = None
msg = commit_info['commit']['message'] msg = commit_info["commit"]["message"]
match = re.search(pattern, msg) match = re.search(pattern, msg)
if match: if match:
tag = match.group().lstrip('[').rstrip(']').capitalize() tag = match.group().lstrip("[").rstrip("]").capitalize()
if tag not in results: if tag not in results:
results[tag] = [] results[tag] = []
results[tag].append((msg, author, author_url)) results[tag].append((msg, author, author_url))
...@@ -89,42 +89,43 @@ def generate_release_post_markdown(current_version, last_version, release_info): ...@@ -89,42 +89,43 @@ def generate_release_post_markdown(current_version, last_version, release_info):
for msg, author, author_url in v: for msg, author, author_url in v:
# only keep the first line # only keep the first line
msg = msg.split('\n')[0] msg = msg.split("\n")[0]
if author_url: if author_url:
item = f'{msg} by [{author}]({author_url})\n' item = f"{msg} by [{author}]({author_url})\n"
else: else:
item = f'{msg} by {author}\n' item = f"{msg} by {author}\n"
text.append(f'- {item}') text.append(f"- {item}")
text.append('\n') text.append("\n")
# add full change log # add full change log
text.append( text.append(
f'**Full Changelog**: https://github.com/hpcaitech/ColossalAI/compare/{current_version}...{last_version}') f"**Full Changelog**: https://github.com/hpcaitech/ColossalAI/compare/{current_version}...{last_version}"
)
return text return text
if __name__ == '__main__': if __name__ == "__main__":
args = parse_args() args = parse_args()
token = os.environ['GITHUB_API_TOKEN'] token = os.environ["GITHUB_API_TOKEN"]
headers = {'Authorization': token} headers = {"Authorization": token}
# get previous release tag # get previous release tag
last_release_commit, last_version = get_latest_tag_commit(headers) last_release_commit, last_version = get_latest_tag_commit(headers)
last_release_commit_info = get_commit_info(last_release_commit, headers=headers) last_release_commit_info = get_commit_info(last_release_commit, headers=headers)
last_release_date = last_release_commit_info['commit']['author']['date'] last_release_date = last_release_commit_info["commit"]["author"]["date"]
# get the commits since last release # get the commits since last release
commit_info = get_all_commit_info(since=last_release_date, headers=headers) commit_info = get_all_commit_info(since=last_release_date, headers=headers)
commit_info = commit_info[:-1] # remove the release commit commit_info = commit_info[:-1] # remove the release commit
# collate into markdown # collate into markdown
release_info = collate_release_info(commit_info) release_info = collate_release_info(commit_info)
markdown_text = generate_release_post_markdown(args.version, last_version, release_info) markdown_text = generate_release_post_markdown(args.version, last_version, release_info)
# write into a file # write into a file
with open(args.out, 'w') as f: with open(args.out, "w") as f:
for line in markdown_text: for line in markdown_text:
f.write(line) f.write(line)
...@@ -5,8 +5,8 @@ import requests ...@@ -5,8 +5,8 @@ import requests
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-m', '--message', type=str) parser.add_argument("-m", "--message", type=str)
parser.add_argument('-u', '--url', type=str) parser.add_argument("-u", "--url", type=str)
return parser.parse_args() return parser.parse_args()
...@@ -15,6 +15,6 @@ def send_message_to_lark(message, webhook_url): ...@@ -15,6 +15,6 @@ def send_message_to_lark(message, webhook_url):
requests.post(webhook_url, json=data) requests.post(webhook_url, json=data)
if __name__ == '__main__': if __name__ == "__main__":
args = parse_args() args = parse_args()
send_message_to_lark(args.message, args.url) send_message_to_lark(args.message, args.url)
...@@ -155,3 +155,7 @@ colossalai/version.py ...@@ -155,3 +155,7 @@ colossalai/version.py
# ignore coverage test file # ignore coverage test file
coverage.lcov coverage.lcov
coverage.xml coverage.xml
# ignore testmon and coverage files
.coverage
.testmondata*
...@@ -3,3 +3,5 @@ line_length = 120 ...@@ -3,3 +3,5 @@ line_length = 120
multi_line_output=3 multi_line_output=3
include_trailing_comma = true include_trailing_comma = true
ignore_comments = true ignore_comments = true
profile = black
honor_noqa = true
repos: repos:
- repo: https://github.com/PyCQA/autoflake
rev: v2.2.1
hooks:
- id: autoflake
name: autoflake (python)
args: ['--in-place', '--remove-unused-variables', '--remove-all-unused-imports', '--ignore-init-module-imports']
- repo: https://github.com/pycqa/isort - repo: https://github.com/pycqa/isort
rev: 5.12.0 rev: 5.12.0
hooks: hooks:
- id: isort - id: isort
name: sort all imports (python) name: sort all imports (python)
- repo: https://github.com/pre-commit/mirrors-yapf - repo: https://github.com/psf/black-pre-commit-mirror
rev: v0.32.0 rev: 23.9.1
hooks: hooks:
- id: yapf - id: black
name: yapf formatter name: black formatter
args: ['--style=.style.yapf', '--parallel', '--in-place'] args: ['--line-length=120', '--target-version=py37', '--target-version=py38', '--target-version=py39','--target-version=py310']
- repo: https://github.com/pre-commit/mirrors-clang-format - repo: https://github.com/pre-commit/mirrors-clang-format
rev: v13.0.1 rev: v13.0.1
hooks: hooks:
- id: clang-format - id: clang-format
name: clang formatter name: clang formatter
types_or: [c++, c]
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0 rev: v4.3.0
......
[style]
based_on_style = google
spaces_before_comment = 4
split_before_logical_operator = true
column_limit = 120
...@@ -30,6 +30,12 @@ pip install <options> -e . ...@@ -30,6 +30,12 @@ pip install <options> -e .
### Unit Tests ### Unit Tests
We use [PyTest](https://docs.pytest.org/en/latest/) to execute tests. You can install pytest by `pip install pytest`. As some of the tests require initialization of the distributed backend, GPUs are needed to execute these tests. We use [PyTest](https://docs.pytest.org/en/latest/) to execute tests. You can install pytest by `pip install pytest`. As some of the tests require initialization of the distributed backend, GPUs are needed to execute these tests.
To set up the environment for unit testing, first change your current directory to the root directory of your local ColossalAI repository, then run
```bash
pip install -r requirements/requirements-test.txt
```
If you encounter an error telling "Could not find a version that satisfies the requirement fbgemm-gpu==0.2.0", please downgrade your python version to 3.8 or 3.9 and try again.
If you only want to run CPU tests, you can run If you only want to run CPU tests, you can run
```bash ```bash
...@@ -138,4 +144,4 @@ You can now create a pull request on the GitHub webpage of your repository. The ...@@ -138,4 +144,4 @@ You can now create a pull request on the GitHub webpage of your repository. The
Do write clearly the description of your pull request and [link the pull request to your target issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue). This will automatically close the issue when the pull request is approved. Do write clearly the description of your pull request and [link the pull request to your target issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue). This will automatically close the issue when the pull request is approved.
In case of code conflict, you should rebase your branch and resolve the conflicts manually. In case of code conflict, you should rebase your branch and resolve the conflicts manually.
\ No newline at end of file
...@@ -396,3 +396,84 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. ...@@ -396,3 +396,84 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE. POSSIBILITY OF SUCH DAMAGE.
---------------- LICENSE FOR VLLM TEAM ----------------
from VLLM TEAM:
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://github.com/vllm-project/vllm/blob/main/LICENSE
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---------------- LICENSE FOR LIGHTLLM TEAM ----------------
from LIGHTLLM TEAM:
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://github.com/ModelTC/lightllm/blob/main/LICENSE
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---------------- LICENSE FOR AutoGPTQ ----------------
From AutoGPTQ:
MIT License
Copyright (c) 2023 潘其威(William)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------------- LICENSE FOR exllama ----------------
From exllama:
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
[![Documentation](https://readthedocs.org/projects/colossalai/badge/?version=latest)](https://colossalai.readthedocs.io/en/latest/?badge=latest) [![Documentation](https://readthedocs.org/projects/colossalai/badge/?version=latest)](https://colossalai.readthedocs.io/en/latest/?badge=latest)
[![CodeFactor](https://www.codefactor.io/repository/github/hpcaitech/colossalai/badge)](https://www.codefactor.io/repository/github/hpcaitech/colossalai) [![CodeFactor](https://www.codefactor.io/repository/github/hpcaitech/colossalai/badge)](https://www.codefactor.io/repository/github/hpcaitech/colossalai)
[![HuggingFace badge](https://img.shields.io/badge/%F0%9F%A4%97HuggingFace-Join-yellow)](https://huggingface.co/hpcai-tech) [![HuggingFace badge](https://img.shields.io/badge/%F0%9F%A4%97HuggingFace-Join-yellow)](https://huggingface.co/hpcai-tech)
[![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w) [![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://github.com/hpcaitech/public_assets/tree/main/colossalai/contact/slack)
[![WeChat badge](https://img.shields.io/badge/微信-加入-green?logo=wechat&amp)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png) [![WeChat badge](https://img.shields.io/badge/微信-加入-green?logo=wechat&amp)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png)
...@@ -25,14 +25,15 @@ ...@@ -25,14 +25,15 @@
</div> </div>
## Latest News ## Latest News
* [2023/09] [One Half-Day of Training Using a Few Hundred Dollars Yields Similar Results to Mainstream Large Models, Open-Source and Commercial-Free Domain-Specific Llm Solution](https://www.hpc-ai.tech/blog/one-half-day-of-training-using-a-few-hundred-dollars-yields-similar-results-to-mainstream-large-models-open-source-and-commercial-free-domain-specific-llm-solution)
* [2023/09] [70 Billion Parameter LLaMA2 Model Training Accelerated by 195%](https://www.hpc-ai.tech/blog/70b-llama2-training)
* [2023/07] [HPC-AI Tech Raises 22 Million USD in Series A Funding](https://www.hpc-ai.tech/blog/hpc-ai-tech-raises-22-million-usd-in-series-a-funding-to-fuel-team-expansion-and-business-growth)
* [2023/07] [65B Model Pretraining Accelerated by 38%, Best Practices for Building LLaMA-Like Base Models Open-Source](https://www.hpc-ai.tech/blog/large-model-pretraining)
* [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b) * [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
* [2023/03] [Intel and Colossal-AI Partner to Deliver Cost-Efficient Open-Source Solution for Protein Folding Structure Prediction](https://www.hpc-ai.tech/blog/intel-habana) * [2023/03] [Intel and Colossal-AI Partner to Deliver Cost-Efficient Open-Source Solution for Protein Folding Structure Prediction](https://www.hpc-ai.tech/blog/intel-habana)
* [2023/03] [AWS and Google Fund Colossal-AI with Startup Cloud Programs](https://www.hpc-ai.tech/blog/aws-and-google-fund-colossal-ai-with-startup-cloud-programs) * [2023/03] [AWS and Google Fund Colossal-AI with Startup Cloud Programs](https://www.hpc-ai.tech/blog/aws-and-google-fund-colossal-ai-with-startup-cloud-programs)
* [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt) * [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt)
* [2023/01] [Hardware Savings Up to 46 Times for AIGC and Automatic Parallelism](https://medium.com/pytorch/latest-colossal-ai-boasts-novel-automatic-parallelism-and-offers-savings-up-to-46x-for-stable-1453b48f3f02) * [2023/01] [Hardware Savings Up to 46 Times for AIGC and Automatic Parallelism](https://medium.com/pytorch/latest-colossal-ai-boasts-novel-automatic-parallelism-and-offers-savings-up-to-46x-for-stable-1453b48f3f02)
* [2022/11] [Diffusion Pretraining and Hardware Fine-Tuning Can Be Almost 7X Cheaper](https://www.hpc-ai.tech/blog/diffusion-pretraining-and-hardware-fine-tuning-can-be-almost-7x-cheaper)
* [2022/10] [Use a Laptop to Analyze 90% of Proteins, With a Single-GPU Inference Sequence Exceeding 10,000](https://www.hpc-ai.tech/blog/use-a-laptop-to-analyze-90-of-proteins-with-a-single-gpu-inference-sequence-exceeding)
* [2022/09] [HPC-AI Tech Completes $6 Million Seed and Angel Round Fundraising](https://www.hpc-ai.tech/blog/hpc-ai-tech-completes-6-million-seed-and-angel-round-fundraising-led-by-bluerun-ventures-in-the)
## Table of Contents ## Table of Contents
<ul> <ul>
...@@ -41,6 +42,7 @@ ...@@ -41,6 +42,7 @@
<li> <li>
<a href="#Colossal-AI-in-the-Real-World">Colossal-AI for Real World Applications</a> <a href="#Colossal-AI-in-the-Real-World">Colossal-AI for Real World Applications</a>
<ul> <ul>
<li><a href="#Colossal-LLaMA-2">Colossal-LLaMA-2: One Half-Day of Training Using a Few Hundred Dollars Yields Similar Results to Mainstream Large Models, Open-Source and Commercial-Free Domain-Specific Llm Solution</a></li>
<li><a href="#ColossalChat">ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline</a></li> <li><a href="#ColossalChat">ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline</a></li>
<li><a href="#AIGC">AIGC: Acceleration of Stable Diffusion</a></li> <li><a href="#AIGC">AIGC: Acceleration of Stable Diffusion</a></li>
<li><a href="#Biomedicine">Biomedicine: Acceleration of AlphaFold Protein Structure</a></li> <li><a href="#Biomedicine">Biomedicine: Acceleration of AlphaFold Protein Structure</a></li>
...@@ -49,6 +51,7 @@ ...@@ -49,6 +51,7 @@
<li> <li>
<a href="#Parallel-Training-Demo">Parallel Training Demo</a> <a href="#Parallel-Training-Demo">Parallel Training Demo</a>
<ul> <ul>
<li><a href="#LLaMA2">LLaMA 1/2</a></li>
<li><a href="#GPT-3">GPT-3</a></li> <li><a href="#GPT-3">GPT-3</a></li>
<li><a href="#GPT-2">GPT-2</a></li> <li><a href="#GPT-2">GPT-2</a></li>
<li><a href="#BERT">BERT</a></li> <li><a href="#BERT">BERT</a></li>
...@@ -124,15 +127,55 @@ distributed training and inference in a few lines. ...@@ -124,15 +127,55 @@ distributed training and inference in a few lines.
## Colossal-AI in the Real World ## Colossal-AI in the Real World
### Colossal-LLaMA-2
- One half-day of training using a few hundred dollars yields similar results to mainstream large models, open-source and commercial-free domain-specific LLM solution.
[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Colossal-LLaMA-2)
[[blog]](https://www.hpc-ai.tech/blog/one-half-day-of-training-using-a-few-hundred-dollars-yields-similar-results-to-mainstream-large-models-open-source-and-commercial-free-domain-specific-llm-solution)
[[model weights]](https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-7b-base)
| | Backbone | Tokens Consumed | | MMLU | CMMLU | AGIEval | GAOKAO | CEval |
| :----------------------------: | :--------: | :-------------: | :------------------: | :-----------: | :-----: | :----: | :----: | :------------------------------: |
| | | - | | 5-shot | 5-shot | 5-shot | 0-shot | 5-shot |
| Baichuan-7B | - | 1.2T | | 42.32 (42.30) | 44.53 (44.02) | 38.72 | 36.74 | 42.80 |
| Baichuan-13B-Base | - | 1.4T | | 50.51 (51.60) | 55.73 (55.30) | 47.20 | 51.41 | 53.60 |
| Baichuan2-7B-Base | - | 2.6T | | 46.97 (54.16) | 57.67 (57.07) | 45.76 | 52.60 | 54.00 |
| Baichuan2-13B-Base | - | 2.6T | | 54.84 (59.17) | 62.62 (61.97) | 52.08 | 58.25 | 58.10 |
| ChatGLM-6B | - | 1.0T | | 39.67 (40.63) | 41.17 (-) | 40.10 | 36.53 | 38.90 |
| ChatGLM2-6B | - | 1.4T | | 44.74 (45.46) | 49.40 (-) | 46.36 | 45.49 | 51.70 |
| InternLM-7B | - | 1.6T | | 46.70 (51.00) | 52.00 (-) | 44.77 | 61.64 | 52.80 |
| Qwen-7B | - | 2.2T | | 54.29 (56.70) | 56.03 (58.80) | 52.47 | 56.42 | 59.60 |
| | | | | | | | | |
| Llama-2-7B | - | 2.0T | | 44.47 (45.30) | 32.97 (-) | 32.60 | 25.46 | - |
| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B | 1.0T | | 37.43 | 29.92 | 32.00 | 27.57 | - |
| wenge-research/yayi-7b-llama2 | Llama-2-7B | - | | 38.56 | 31.52 | 30.99 | 25.95 | - |
| ziqingyang/chinese-llama-2-7b | Llama-2-7B | - | | 33.86 | 34.69 | 34.52 | 25.18 | 34.2 |
| TigerResearch/tigerbot-7b-base | Llama-2-7B | 0.3T | | 43.73 | 42.04 | 37.64 | 30.61 | - |
| LinkSoul/Chinese-Llama-2-7b | Llama-2-7B | - | | 48.41 | 38.31 | 38.45 | 27.72 | - |
| FlagAlpha/Atom-7B | Llama-2-7B | 0.1T | | 49.96 | 41.10 | 39.83 | 33.00 | - |
| IDEA-CCNL/Ziya-LLaMA-13B-v1.1 | Llama-13B | 0.11T | | 50.25 | 40.99 | 40.04 | 30.54 | - |
| | | | | | | | | |
| **Colossal-LLaMA-2-7b-base** | Llama-2-7B | **0.0085T** | | 53.06 | 49.89 | 51.48 | 58.82 | 50.2 |
### ColossalChat ### ColossalChat
<div align="center"> <div align="center">
<a href="https://chat.colossalai.org/"> <a href="https://www.youtube.com/watch?v=HcTiHzApHm0">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/Chat-demo.png" width="700" /> <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20YouTube.png" width="700" />
</a> </a>
</div> </div>
[ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat): An open-source solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline. [[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat) [[blog]](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b) [[demo]](https://chat.colossalai.org) [ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat): An open-source solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline.
[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat)
[[blog]](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
[[demo]](https://www.youtube.com/watch?v=HcTiHzApHm0)
[[tutorial]](https://www.youtube.com/watch?v=-qFBZFmOJfg)
<p id="ColossalChat-Speed" align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20Speed.jpg" width=450/>
</p>
- Up to 10 times faster for RLHF PPO Stage3 Training
<p id="ColossalChat_scaling" align="center"> <p id="ColossalChat_scaling" align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/> <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/>
...@@ -205,6 +248,23 @@ Acceleration of [AlphaFold Protein Structure](https://alphafold.ebi.ac.uk/) ...@@ -205,6 +248,23 @@ Acceleration of [AlphaFold Protein Structure](https://alphafold.ebi.ac.uk/)
<p align="right">(<a href="#top">back to top</a>)</p> <p align="right">(<a href="#top">back to top</a>)</p>
## Parallel Training Demo ## Parallel Training Demo
### LLaMA2
<p align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/llama2_pretraining.png" width=600/>
</p>
- 70 billion parameter LLaMA2 model training accelerated by 195%
[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/examples/language/llama2)
[[blog]](https://www.hpc-ai.tech/blog/70b-llama2-training)
### LLaMA1
<p align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/examples/images/LLaMA_pretraining.png" width=600/>
</p>
- 65-billion-parameter large model pretraining accelerated by 38%
[[code]](https://github.com/hpcaitech/ColossalAI/tree/example/llama/examples/language/llama)
[[blog]](https://www.hpc-ai.tech/blog/large-model-pretraining)
### GPT-3 ### GPT-3
<p align="center"> <p align="center">
...@@ -352,6 +412,22 @@ If you want to install and enable CUDA kernel fusion (compulsory installation wh ...@@ -352,6 +412,22 @@ If you want to install and enable CUDA kernel fusion (compulsory installation wh
CUDA_EXT=1 pip install . CUDA_EXT=1 pip install .
``` ```
For Users with CUDA 10.2, you can still build ColossalAI from source. However, you need to manually download the cub library and copy it to the corresponding directory.
```bash
# clone the repository
git clone https://github.com/hpcaitech/ColossalAI.git
cd ColossalAI
# download the cub library
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
# install
CUDA_EXT=1 pip install .
```
<p align="right">(<a href="#top">back to top</a>)</p> <p align="right">(<a href="#top">back to top</a>)</p>
## Use Docker ## Use Docker
...@@ -426,6 +502,7 @@ To cite this project, you can use the following BibTeX citation. ...@@ -426,6 +502,7 @@ To cite this project, you can use the following BibTeX citation.
} }
``` ```
Colossal-AI has been accepted as official tutorial by top conferences [SC](https://sc22.supercomputing.org/), [AAAI](https://aaai.org/Conferences/AAAI-23/), [PPoPP](https://ppopp23.sigplan.org/), [CVPR](https://cvpr2023.thecvf.com/), [ISC](https://www.isc-hpc.com/), etc. Colossal-AI has been accepted as official tutorial by top conferences [NeurIPS](https://nips.cc/), [SC](https://sc22.supercomputing.org/), [AAAI](https://aaai.org/Conferences/AAAI-23/),
[PPoPP](https://ppopp23.sigplan.org/), [CVPR](https://cvpr2023.thecvf.com/), [ISC](https://www.isc-hpc.com/), [NVIDIA GTC](https://www.nvidia.com/en-us/on-demand/session/gtcspring23-S51482/) ,etc.
<p align="right">(<a href="#top">back to top</a>)</p> <p align="right">(<a href="#top">back to top</a>)</p>
...@@ -145,4 +145,4 @@ docs/.build ...@@ -145,4 +145,4 @@ docs/.build
# wandb log # wandb log
example/wandb/ example/wandb/
examples/awesome-chatgpt-prompts/ examples/awesome-chatgpt-prompts/
\ No newline at end of file
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
<span>ColossalChat</span> <span>ColossalChat</span>
</h1> </h1>
## Table of Contents ## Table of Contents
- [Table of Contents](#table-of-contents) - [Table of Contents](#table-of-contents)
...@@ -34,7 +33,9 @@ ...@@ -34,7 +33,9 @@
- [Authors](#authors) - [Authors](#authors)
- [Citations](#citations) - [Citations](#citations)
- [Licenses](#licenses) - [Licenses](#licenses)
--- ---
## What is ColossalChat and Coati ? ## What is ColossalChat and Coati ?
[ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat) is the project to implement LLM with RLHF, powered by the [Colossal-AI](https://github.com/hpcaitech/ColossalAI) project. [ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat) is the project to implement LLM with RLHF, powered by the [Colossal-AI](https://github.com/hpcaitech/ColossalAI) project.
...@@ -42,6 +43,7 @@ ...@@ -42,6 +43,7 @@
Coati stands for `ColossalAI Talking Intelligence`. It is the name for the module implemented in this project and is also the name of the large language model developed by the ColossalChat project. Coati stands for `ColossalAI Talking Intelligence`. It is the name for the module implemented in this project and is also the name of the large language model developed by the ColossalChat project.
The Coati package provides a unified large language model framework that has implemented the following functions The Coati package provides a unified large language model framework that has implemented the following functions
- Supports comprehensive large-model training acceleration capabilities for ColossalAI, without requiring knowledge of complex distributed training algorithms - Supports comprehensive large-model training acceleration capabilities for ColossalAI, without requiring knowledge of complex distributed training algorithms
- Supervised datasets collection - Supervised datasets collection
- Supervised instructions fine-tuning - Supervised instructions fine-tuning
...@@ -56,29 +58,42 @@ The Coati package provides a unified large language model framework that has imp ...@@ -56,29 +58,42 @@ The Coati package provides a unified large language model framework that has imp
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/chatgpt.png" width=700/> <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/chatgpt.png" width=700/>
</p> </p>
Image source: https://openai.com/blog/chatgpt Image source: https://openai.com/blog/chatgpt
</div> </div>
**As Colossal-AI is undergoing some major updates, this project will be actively maintained to stay in line with the Colossal-AI project.** **As Colossal-AI is undergoing some major updates, this project will be actively maintained to stay in line with the Colossal-AI project.**
More details can be found in the latest news. More details can be found in the latest news.
* [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
* [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt) - [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
- [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt)
## Online demo ## Online demo
You can experience the performance of Coati7B on this page.
[chat.colossalai.org](https://chat.colossalai.org/) <div align="center">
<a href="https://www.youtube.com/watch?v=HcTiHzApHm0">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20YouTube.png" width="700" />
</a>
</div>
[ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat): An open-source solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline.
[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat)
[[blog]](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
[[demo]](https://www.youtube.com/watch?v=HcTiHzApHm0)
[[tutorial]](https://www.youtube.com/watch?v=-qFBZFmOJfg)
Due to resource constraints, we will only provide this service from 29th Mar 2023 to 5 April 2023. However, we have provided the inference code in the [inference](./inference/) folder. The WebUI will be open-sourced soon as well. <p id="ColossalChat-Speed" align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20Speed.jpg" width=450/>
</p>
> DeepSpeedChat performance comes from its blog on 2023 April 12, ColossalChat performance can be reproduced on an AWS p4d.24xlarge node with 8 A100-40G GPUs with the following command: `torchrun --standalone --nproc_per_node 8 benchmark_opt_lora_dummy.py --num_collect_steps 1 --use_kernels --strategy colossalai_zero2 --experience_batch_size 64 --train_batch_size 32`
> Warning: Due to model and dataset size limitations, Coati is just a baby model, Coati7B may output incorrect information and lack the ability for multi-turn dialogue. There is still significant room for improvement.
## Install ## Install
### Install the environment ### Install the environment
```shell ```bash
conda create -n coati conda create -n coati
conda activate coati conda activate coati
git clone https://github.com/hpcaitech/ColossalAI.git git clone https://github.com/hpcaitech/ColossalAI.git
...@@ -87,22 +102,20 @@ pip install . ...@@ -87,22 +102,20 @@ pip install .
``` ```
### Install the Transformers ### Install the Transformers
Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code
```shell ```bash
git clone https://github.com/hpcaitech/transformers pip install transformers==4.30.2
cd transformers
pip install .
``` ```
## How to use? ## How to use?
### Supervised datasets collection ### Supervised datasets collection
we collected 104K bilingual datasets of Chinese and English, and you can find the datasets in this repo We collected 104K bilingual datasets of Chinese and English, and you can find the datasets in this repo
[InstructionWild](https://github.com/XueFuzhao/InstructionWild) [InstructionWild](https://github.com/XueFuzhao/InstructionWild) and in this [file](https://github.com/XueFuzhao/InstructionWild/blob/main/data/README.md).
Here is how we collected the data Here is how we collected the data
<p align="center"> <p align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/data-collect.png" width=500/> <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/data-collect.png" width=500/>
</p> </p>
...@@ -112,12 +125,28 @@ Here is how we collected the data ...@@ -112,12 +125,28 @@ Here is how we collected the data
Stage1 is supervised instructs fine-tuning, which uses the datasets mentioned earlier to fine-tune the model. Stage1 is supervised instructs fine-tuning, which uses the datasets mentioned earlier to fine-tune the model.
You can run the `examples/train_sft.sh` to start a supervised instructs fine-tuning. You can run the `examples/train_sft.sh` to start a supervised instructs fine-tuning.
[[Stage1 tutorial video]](https://www.youtube.com/watch?v=-qFBZFmOJfg)
**Note**: the supervised dataset follows the following format,
```json
[
{
"instruction": "Provide a list of the top 10 most popular mobile games in Asia",
"input": "",
"output": "The top 10 most popular mobile games in Asia are:\n1) PUBG Mobile\n2) Pokemon Go\n3) Candy Crush Saga\n4) Free Fire\n5) Clash of Clans\n6) Mario Kart Tour\n7) Arena of Valor\n8) Fantasy Westward Journey\n9) Subway Surfers\n10) ARK Survival Evolved",
"id": 0
},
...
]
```
### RLHF Training Stage2 - Training reward model ### RLHF Training Stage2 - Training reward model
Stage2 trains a reward model, which obtains corresponding scores by manually ranking different outputs for the same prompt and supervises the training of the reward model Stage2 trains a reward model, which obtains corresponding scores by manually ranking different outputs for the same prompt and supervises the training of the reward model
You can run the `examples/train_rm.sh` to start a reward model training. You can run the `examples/train_rm.sh` to start a reward model training.
[[Stage2 tutorial video]](https://www.youtube.com/watch?v=gMx2CApKhuo)
### RLHF Training Stage3 - Training model with reinforcement learning by human feedback ### RLHF Training Stage3 - Training model with reinforcement learning by human feedback
...@@ -128,6 +157,39 @@ Stage3 uses reinforcement learning algorithm, which is the most complex part of ...@@ -128,6 +157,39 @@ Stage3 uses reinforcement learning algorithm, which is the most complex part of
</p> </p>
You can run the `examples/train_prompts.sh` to start training PPO with human feedback. You can run the `examples/train_prompts.sh` to start training PPO with human feedback.
[[Stage3 tutorial video]](https://www.youtube.com/watch?v=Z8wwSHxPL9g)
**Note**: the required datasets follow the following format,
- `pretrain dataset`
```json
[
{
"instruction": "Provide a list of the top 10 most popular mobile games in Asia",
"input": "",
"output": "The top 10 most popular mobile games in Asia are:\n1) PUBG Mobile\n2) Pokemon Go\n3) Candy Crush Saga\n4) Free Fire\n5) Clash of Clans\n6) Mario Kart Tour\n7) Arena of Valor\n8) Fantasy Westward Journey\n9) Subway Surfers\n10) ARK Survival Evolved",
"id": 0
},
...
]
```
- `prompt dataset`
```json
[
{
"instruction": "Edit this paragraph to make it more concise: \"Yesterday, I went to the store and bought some things. Then, I came home and put them away. After that, I went for a walk and met some friends.\"",
"id": 0
},
{
"instruction": "Write a descriptive paragraph about a memorable vacation you went on",
"id": 1
},
...
]
```
For more details, see [`examples/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/examples). For more details, see [`examples/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/examples).
...@@ -135,9 +197,9 @@ For more details, see [`examples/`](https://github.com/hpcaitech/ColossalAI/tree ...@@ -135,9 +197,9 @@ For more details, see [`examples/`](https://github.com/hpcaitech/ColossalAI/tree
We provide an online inference server and a benchmark. We aim to run inference on single GPU, so quantization is essential when using large models. We provide an online inference server and a benchmark. We aim to run inference on single GPU, so quantization is essential when using large models.
We support 8-bit quantization (RTN), 4-bit quantization (GPTQ), and FP16 inference. You can We support 8-bit quantization (RTN), 4-bit quantization (GPTQ), and FP16 inference.
Online inference server scripts can help you deploy your own services.
Online inference server scripts can help you deploy your own services.
For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/inference). For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/inference).
## Coati7B examples ## Coati7B examples
...@@ -147,6 +209,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre ...@@ -147,6 +209,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre
<details><summary><b>E-mail</b></summary> <details><summary><b>E-mail</b></summary>
![phd](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/Phd.png) ![phd](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/Phd.png)
</details> </details>
<details><summary><b>coding</b></summary> <details><summary><b>coding</b></summary>
...@@ -180,6 +243,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre ...@@ -180,6 +243,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre
</details> </details>
### Open QA ### Open QA
<details><summary><b>Game</b></summary> <details><summary><b>Game</b></summary>
![Game](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/game.png) ![Game](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/game.png)
...@@ -213,6 +277,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre ...@@ -213,6 +277,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre
You can find more examples in this [repo](https://github.com/XueFuzhao/InstructionWild/blob/main/comparison.md). You can find more examples in this [repo](https://github.com/XueFuzhao/InstructionWild/blob/main/comparison.md).
### Limitation ### Limitation
<details><summary><b>Limitation for LLaMA-finetuned models</b></summary> <details><summary><b>Limitation for LLaMA-finetuned models</b></summary>
- Both Alpaca and ColossalChat are based on LLaMA. It is hard to compensate for the missing knowledge in the pre-training stage. - Both Alpaca and ColossalChat are based on LLaMA. It is hard to compensate for the missing knowledge in the pre-training stage.
- Lack of counting ability: Cannot count the number of items in a list. - Lack of counting ability: Cannot count the number of items in a list.
...@@ -236,7 +301,7 @@ You can find more examples in this [repo](https://github.com/XueFuzhao/Instructi ...@@ -236,7 +301,7 @@ You can find more examples in this [repo](https://github.com/XueFuzhao/Instructi
We have integrated the Transformers save and load pipeline, allowing users to freely call Hugging Face's language models and save them in the HF format. We have integrated the Transformers save and load pipeline, allowing users to freely call Hugging Face's language models and save them in the HF format.
``` ```python
from coati.models.llama import LlamaLM from coati.models.llama import LlamaLM
from coati.trainer import SFTTrainer from coati.trainer import SFTTrainer
...@@ -245,20 +310,20 @@ tokenizer = AutoTokenizer.from_pretrained(args.pretrain) ...@@ -245,20 +310,20 @@ tokenizer = AutoTokenizer.from_pretrained(args.pretrain)
(model, optim) = strategy.prepare((model, optim)) (model, optim) = strategy.prepare((model, optim))
trainer = SFTTrainer(model=model, trainer = SFTTrainer(model=model,
strategy=strategy, strategy=strategy,
optim=optim, optim=optim,
train_dataloader=train_dataloader, train_dataloader=train_dataloader,
eval_dataloader=eval_dataloader, eval_dataloader=eval_dataloader,
batch_size=args.batch_size, batch_size=args.batch_size,
max_epochs=args.max_epochs, max_epochs=args.max_epochs,
accumulation_steps = args.accumulation_steps accumulation_steps=args.accumulation_steps
) )
trainer.fit() trainer.fit()
# this saves in pytorch format # this saves in pytorch format
strategy.save_model(model, args.save_path, only_rank0=True) strategy.save_model(model, args.save_path, only_rank0=True)
# this saves in HF format. ColossalAI strategy with stage-3 doesn't support this method # this saves in HF format
strategy.save_pretrained(model, args.save_path, only_rank0=True, tokenizer=tokenizer) strategy.save_pretrained(model, args.save_path, only_rank0=True, tokenizer=tokenizer)
``` ```
...@@ -269,12 +334,13 @@ strategy.save_pretrained(model, args.save_path, only_rank0=True, tokenizer=token ...@@ -269,12 +334,13 @@ strategy.save_pretrained(model, args.save_path, only_rank0=True, tokenizer=token
Here are some examples that can allow you to train a 7B model on a single or multiple consumer-grade GPUs. Here are some examples that can allow you to train a 7B model on a single or multiple consumer-grade GPUs.
If you only have a single 24G GPU, you can use the following script. `batch_size`, `lora_rank` and `grad_checkpoint` are the most important parameters to successfully train the model. If you only have a single 24G GPU, you can use the following script. `batch_size`, `lora_rank` and `grad_checkpoint` are the most important parameters to successfully train the model.
```
```bash
// [INFO]: MAX GPU MEMORY ALLOCATED: 19148.9345703125 MB
torchrun --standalone --nproc_per_node=1 train_sft.py \ torchrun --standalone --nproc_per_node=1 train_sft.py \
--pretrain "/path/to/LLaMa-7B/" \ --pretrain "/path/to/LLaMa-7B/" \
--model 'llama' \ --model 'llama' \
--strategy naive \ --strategy ddp \
--log_interval 10 \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 1 \ --batch_size 1 \
...@@ -287,12 +353,12 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \ ...@@ -287,12 +353,12 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
``` ```
`colossalai_gemini` strategy can enable a single 24G GPU to train the whole model without using LoRA if you have sufficient CPU memory. You can use the following script. `colossalai_gemini` strategy can enable a single 24G GPU to train the whole model without using LoRA if you have sufficient CPU memory. You can use the following script.
```
```bash
torchrun --standalone --nproc_per_node=1 train_sft.py \ torchrun --standalone --nproc_per_node=1 train_sft.py \
--pretrain "/path/to/LLaMa-7B/" \ --pretrain "/path/to/LLaMa-7B/" \
--model 'llama' \ --model 'llama' \
--strategy colossalai_gemini \ --strategy colossalai_gemini \
--log_interval 10 \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 1 \ --batch_size 1 \
...@@ -304,12 +370,12 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \ ...@@ -304,12 +370,12 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
``` ```
If you have 4x32 GB GPUs, you can even train the whole 7B model using our `colossalai_zero2_cpu` strategy! The script is given as follows. If you have 4x32 GB GPUs, you can even train the whole 7B model using our `colossalai_zero2_cpu` strategy! The script is given as follows.
```
```bash
torchrun --standalone --nproc_per_node=4 train_sft.py \ torchrun --standalone --nproc_per_node=4 train_sft.py \
--pretrain "/path/to/LLaMa-7B/" \ --pretrain "/path/to/LLaMa-7B/" \
--model 'llama' \ --model 'llama' \
--strategy colossalai_zero2_cpu \ --strategy colossalai_zero2_cpu \
--log_interval 10 \
--save_path /path/to/Coati-7B \ --save_path /path/to/Coati-7B \
--dataset /path/to/data.json \ --dataset /path/to/data.json \
--batch_size 1 \ --batch_size 1 \
...@@ -319,8 +385,8 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \ ...@@ -319,8 +385,8 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
--max_epochs 1 \ --max_epochs 1 \
--grad_checkpoint --grad_checkpoint
``` ```
</details>
</details>
## The Plan ## The Plan
...@@ -335,31 +401,33 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \ ...@@ -335,31 +401,33 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
- [ ] support chain-of-thought by [langchain](https://github.com/hwchase17/langchain) - [ ] support chain-of-thought by [langchain](https://github.com/hwchase17/langchain)
### Real-time progress ### Real-time progress
You will find our progress in github project broad
[Coati](https://github.com/orgs/hpcaitech/projects/17/views/1) You will find our progress in github [project broad](https://github.com/orgs/hpcaitech/projects/17/views/1).
## Invitation to open-source contribution ## Invitation to open-source contribution
Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models from the starting point of replicating ChatGPT! Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models from the starting point of replicating ChatGPT!
You may contact us or participate in the following ways: You may contact us or participate in the following ways:
1. [Leaving a Star ⭐](https://github.com/hpcaitech/ColossalAI/stargazers) to show your like and support. Thanks! 1. [Leaving a Star ⭐](https://github.com/hpcaitech/ColossalAI/stargazers) to show your like and support. Thanks!
2. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose), or submitting a PR on GitHub follow the guideline in [Contributing](https://github.com/hpcaitech/ColossalAI/blob/main/CONTRIBUTING.md). 2. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose), or submitting a PR on GitHub follow the guideline in [Contributing](https://github.com/hpcaitech/ColossalAI/blob/main/CONTRIBUTING.md).
3. Join the Colossal-AI community on 3. Join the Colossal-AI community on
[Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w), [Slack](https://github.com/hpcaitech/public_assets/tree/main/colossalai/contact/slack),
and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas. and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas.
4. Send your official proposal to email contact@hpcaitech.com 4. Send your official proposal to email contact@hpcaitech.com
Thanks so much to all of our amazing contributors! Thanks so much to all of our amazing contributors!
## Quick Preview ## Quick Preview
<div align="center"> <div align="center">
<a href="https://chat.colossalai.org/"> <a href="https://chat.colossalai.org/">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/Chat-demo.png" width="700" /> <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/Chat-demo.png" width="700" />
</a> </a>
</div> </div>
- An open-source low cost solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline. [[demo]](https://chat.colossalai.org) - An open-source low-cost solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline. [[demo]](https://chat.colossalai.org)
<p id="ChatGPT_scaling" align="center"> <p id="ChatGPT_scaling" align="center">
<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/> <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/>
...@@ -386,18 +454,21 @@ Thanks so much to all of our amazing contributors! ...@@ -386,18 +454,21 @@ Thanks so much to all of our amazing contributors!
| Better Cases | 38 ⚔ **41** | **45** ⚔ 33 | | Better Cases | 38 ⚔ **41** | **45** ⚔ 33 |
| Win Rate | 48% ⚔ **52%** | **58%** ⚔ 42% | | Win Rate | 48% ⚔ **52%** | **58%** ⚔ 42% |
| Average Score | 7.06 ⚔ **7.13** | **7.31** ⚔ 6.82 | | Average Score | 7.06 ⚔ **7.13** | **7.31** ⚔ 6.82 |
- Our Coati-7B model performs better than Alpaca-7B when using GPT-4 to evaluate model performance. The Coati-7B model we evaluate is an old version we trained a few weeks ago and the new version is around the corner. - Our Coati-7B model performs better than Alpaca-7B when using GPT-4 to evaluate model performance. The Coati-7B model we evaluate is an old version we trained a few weeks ago and the new version is around the corner.
## Authors ## Authors
Coati is developed by ColossalAI Team: Coati is developed by ColossalAI Team:
- [Fazzie](https://fazzie-key.cool/about/index.html) - [Fazzie](https://fazzie-key.cool/about/index.html)
- [FrankLeeeee](https://github.com/FrankLeeeee) - [FrankLeeeee](https://github.com/FrankLeeeee)
- [BlueRum](https://github.com/ht-zhou) - [BlueRum](https://github.com/ht-zhou)
- [ver217](https://github.com/ver217) - [ver217](https://github.com/ver217)
- [ofey404](https://github.com/ofey404) - [ofey404](https://github.com/ofey404)
- [Wenhao Chen](https://github.com/CWHer)
The Phd student from [(HPC-AI) Lab](https://ai.comp.nus.edu.sg/) also contributed a lot to this project. The PhD student from [(HPC-AI) Lab](https://ai.comp.nus.edu.sg/) also contributed a lot to this project.
- [Zangwei Zheng](https://github.com/zhengzangw) - [Zangwei Zheng](https://github.com/zhengzangw)
- [Xue Fuzhao](https://github.com/XueFuzhao) - [Xue Fuzhao](https://github.com/XueFuzhao)
......
...@@ -27,9 +27,12 @@ We also provide various training strategies: ...@@ -27,9 +27,12 @@ We also provide various training strategies:
We only support `torchrun` to launch now. E.g. We only support `torchrun` to launch now. E.g.
```shell ```bash
# run OPT-125M with no lora (lora_rank=0) on single-node single-GPU with min batch size # run OPT-125M with no lora (lora_rank=0) on single-node single-GPU with min batch size
torchrun --standalone --nproc_per_node 1 benchmark_opt_lora_dummy.py --model 125m --critic_model 125m --strategy ddp --experience_batch_size 1 --train_batch_size 1 --lora_rank 0 torchrun --standalone --nproc_per_node 1 benchmark_opt_lora_dummy.py \
--model 125m --critic_model 125m --strategy ddp \
--experience_batch_size 1 --train_batch_size 1 --lora_rank 0
# run Actor (OPT-1.3B) and Critic (OPT-350M) with lora_rank=4 on single-node 4-GPU # run Actor (OPT-1.3B) and Critic (OPT-350M) with lora_rank=4 on single-node 4-GPU
torchrun --standalone --nproc_per_node 4 benchmark_opt_lora_dummy.py --model 1.3b --critic_model 350m --strategy colossalai_zero2 --lora_rank 4 torchrun --standalone --nproc_per_node 4 benchmark_opt_lora_dummy.py \
--model 1.3b --critic_model 350m --strategy colossalai_zero2 --lora_rank 4
``` ```
...@@ -8,7 +8,7 @@ from coati.models.base import RewardModel ...@@ -8,7 +8,7 @@ from coati.models.base import RewardModel
from coati.models.opt import OPTActor, OPTCritic from coati.models.opt import OPTActor, OPTCritic
from coati.trainer import PPOTrainer from coati.trainer import PPOTrainer
from coati.trainer.callbacks import PerformanceEvaluator from coati.trainer.callbacks import PerformanceEvaluator
from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, Strategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy, Strategy
from torch.optim import Adam from torch.optim import Adam
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -19,7 +19,7 @@ from colossalai.nn.optimizer import HybridAdam ...@@ -19,7 +19,7 @@ from colossalai.nn.optimizer import HybridAdam
def get_model_numel(model: nn.Module, strategy: Strategy) -> int: def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
numel = sum(p.numel() for p in model.parameters()) numel = sum(p.numel() for p in model.parameters())
if isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3 and strategy.shard_init: if isinstance(strategy, GeminiStrategy) and strategy.shard_init:
numel *= dist.get_world_size() numel *= dist.get_world_size()
return numel return numel
...@@ -27,7 +27,7 @@ def get_model_numel(model: nn.Module, strategy: Strategy) -> int: ...@@ -27,7 +27,7 @@ def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
def preprocess_batch(samples) -> dict: def preprocess_batch(samples) -> dict:
input_ids = torch.stack(samples) input_ids = torch.stack(samples)
attention_mask = torch.ones_like(input_ids, dtype=torch.long) attention_mask = torch.ones_like(input_ids, dtype=torch.long)
return {'input_ids': input_ids, 'attention_mask': attention_mask} return {"input_ids": input_ids, "attention_mask": attention_mask}
def print_rank_0(*args, **kwargs) -> None: def print_rank_0(*args, **kwargs) -> None:
...@@ -39,32 +39,32 @@ def print_model_numel(model_dict: dict) -> None: ...@@ -39,32 +39,32 @@ def print_model_numel(model_dict: dict) -> None:
B = 1024**3 B = 1024**3
M = 1024**2 M = 1024**2
K = 1024 K = 1024
outputs = '' outputs = ""
for name, numel in model_dict.items(): for name, numel in model_dict.items():
outputs += f'{name}: ' outputs += f"{name}: "
if numel >= B: if numel >= B:
outputs += f'{numel / B:.2f} B\n' outputs += f"{numel / B:.2f} B\n"
elif numel >= M: elif numel >= M:
outputs += f'{numel / M:.2f} M\n' outputs += f"{numel / M:.2f} M\n"
elif numel >= K: elif numel >= K:
outputs += f'{numel / K:.2f} K\n' outputs += f"{numel / K:.2f} K\n"
else: else:
outputs += f'{numel}\n' outputs += f"{numel}\n"
print_rank_0(outputs) print_rank_0(outputs)
def get_gpt_config(model_name: str) -> OPTConfig: def get_gpt_config(model_name: str) -> OPTConfig:
model_map = { model_map = {
'125m': OPTConfig.from_pretrained('facebook/opt-125m'), "125m": OPTConfig.from_pretrained("facebook/opt-125m"),
'350m': OPTConfig(hidden_size=1024, ffn_dim=4096, num_hidden_layers=24, num_attention_heads=16), "350m": OPTConfig(hidden_size=1024, ffn_dim=4096, num_hidden_layers=24, num_attention_heads=16),
'700m': OPTConfig(hidden_size=1280, ffn_dim=5120, num_hidden_layers=36, num_attention_heads=20), "700m": OPTConfig(hidden_size=1280, ffn_dim=5120, num_hidden_layers=36, num_attention_heads=20),
'1.3b': OPTConfig.from_pretrained('facebook/opt-1.3b'), "1.3b": OPTConfig.from_pretrained("facebook/opt-1.3b"),
'2.7b': OPTConfig.from_pretrained('facebook/opt-2.7b'), "2.7b": OPTConfig.from_pretrained("facebook/opt-2.7b"),
'3.5b': OPTConfig(hidden_size=3072, ffn_dim=12288, num_hidden_layers=32, num_attention_heads=32), "3.5b": OPTConfig(hidden_size=3072, ffn_dim=12288, num_hidden_layers=32, num_attention_heads=32),
'5.5b': OPTConfig(hidden_size=3840, ffn_dim=15360, num_hidden_layers=32, num_attention_heads=32), "5.5b": OPTConfig(hidden_size=3840, ffn_dim=15360, num_hidden_layers=32, num_attention_heads=32),
'6.7b': OPTConfig.from_pretrained('facebook/opt-6.7b'), "6.7b": OPTConfig.from_pretrained("facebook/opt-6.7b"),
'10b': OPTConfig(hidden_size=5120, ffn_dim=20480, num_hidden_layers=32, num_attention_heads=32), "10b": OPTConfig(hidden_size=5120, ffn_dim=20480, num_hidden_layers=32, num_attention_heads=32),
'13b': OPTConfig.from_pretrained('facebook/opt-13b'), "13b": OPTConfig.from_pretrained("facebook/opt-13b"),
} }
try: try:
return model_map[model_name] return model_map[model_name]
...@@ -73,20 +73,20 @@ def get_gpt_config(model_name: str) -> OPTConfig: ...@@ -73,20 +73,20 @@ def get_gpt_config(model_name: str) -> OPTConfig:
def main(args): def main(args):
if args.strategy == 'ddp': if args.strategy == "ddp":
strategy = DDPStrategy() strategy = DDPStrategy()
elif args.strategy == 'colossalai_gemini': elif args.strategy == "colossalai_gemini":
strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5) strategy = GeminiStrategy(placement_policy="static",initial_scale=2**5)
elif args.strategy == 'colossalai_gemini_cpu': elif args.strategy == "colossalai_gemini_cpu":
strategy = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5) strategy = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
elif args.strategy == 'colossalai_zero2': elif args.strategy == "colossalai_zero2":
strategy = ColossalAIStrategy(stage=2, placement_policy='cuda') strategy = LowLevelZeroStrategy(stage=2, placement_policy="cuda")
elif args.strategy == 'colossalai_zero2_cpu': elif args.strategy == "colossalai_zero2_cpu":
strategy = ColossalAIStrategy(stage=2, placement_policy='cpu') strategy = LowLevelZeroStrategy(stage=2, placement_policy="cpu")
elif args.strategy == 'colossalai_zero1': elif args.strategy == "colossalai_zero1":
strategy = ColossalAIStrategy(stage=1, placement_policy='cuda') strategy = LowLevelZeroStrategy(stage=1, placement_policy="cuda")
elif args.strategy == 'colossalai_zero1_cpu': elif args.strategy == "colossalai_zero1_cpu":
strategy = ColossalAIStrategy(stage=1, placement_policy='cpu') strategy = LowLevelZeroStrategy(stage=1, placement_policy="cpu")
else: else:
raise ValueError(f'Unsupported strategy "{args.strategy}"') raise ValueError(f'Unsupported strategy "{args.strategy}"')
...@@ -103,92 +103,106 @@ def main(args): ...@@ -103,92 +103,106 @@ def main(args):
if args.use_kernels: if args.use_kernels:
from coati.kernels import convert_to_xformer_model from coati.kernels import convert_to_xformer_model
actor, critic, initial_model, reward_model = map(convert_to_xformer_model,
(actor, critic, initial_model, reward_model)) actor, critic, initial_model, reward_model = map(
convert_to_xformer_model, (actor, critic, initial_model, reward_model)
)
actor_numel = get_model_numel(actor, strategy) actor_numel = get_model_numel(actor, strategy)
critic_numel = get_model_numel(critic, strategy) critic_numel = get_model_numel(critic, strategy)
initial_model_numel = get_model_numel(initial_model, strategy) initial_model_numel = get_model_numel(initial_model, strategy)
reward_model_numel = get_model_numel(reward_model, strategy) reward_model_numel = get_model_numel(reward_model, strategy)
print_model_numel({ print_model_numel(
'Actor': actor_numel, {
'Critic': critic_numel, "Actor": actor_numel,
'Initial model': initial_model_numel, "Critic": critic_numel,
'Reward model': reward_model_numel "Initial model": initial_model_numel,
}) "Reward model": reward_model_numel,
performance_evaluator = PerformanceEvaluator(actor_numel, }
critic_numel, )
initial_model_numel, performance_evaluator = PerformanceEvaluator(
reward_model_numel, actor_numel,
enable_grad_checkpoint=False, critic_numel,
ignore_episodes=1) initial_model_numel,
reward_model_numel,
if args.strategy.startswith('colossalai'): enable_grad_checkpoint=False,
ignore_episodes=1,
)
if args.strategy.startswith("colossalai"):
actor_optim = HybridAdam(actor.parameters(), lr=5e-6) actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
critic_optim = HybridAdam(critic.parameters(), lr=5e-6) critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
else: else:
actor_optim = Adam(actor.parameters(), lr=5e-6) actor_optim = Adam(actor.parameters(), lr=5e-6)
critic_optim = Adam(critic.parameters(), lr=5e-6) critic_optim = Adam(critic.parameters(), lr=5e-6)
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m') tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
(actor, actor_optim), (critic, critic_optim) = strategy.prepare((actor, actor_optim), (critic, critic_optim)) (actor, actor_optim), (critic, critic_optim) = strategy.prepare((actor, actor_optim), (critic, critic_optim))
trainer = PPOTrainer(strategy,
actor,
critic,
reward_model,
initial_model,
actor_optim,
critic_optim,
ptx_coef=0,
max_epochs=args.max_epochs,
train_batch_size=args.train_batch_size,
offload_inference_models=args.offload_inference_models,
max_length=512,
do_sample=True,
temperature=1.0,
top_k=50,
use_cache=True,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
callbacks=[performance_evaluator])
random_prompts = torch.randint(tokenizer.vocab_size, (1000, 256), device=torch.cuda.current_device()) random_prompts = torch.randint(tokenizer.vocab_size, (1000, 256), device=torch.cuda.current_device())
dataloader = DataLoader(random_prompts, dataloader = DataLoader(
batch_size=args.experience_batch_size, random_prompts, batch_size=args.experience_batch_size, shuffle=True, collate_fn=preprocess_batch
shuffle=True, )
collate_fn=preprocess_batch)
trainer = PPOTrainer(
trainer.fit(dataloader, strategy,
None, actor,
num_episodes=args.num_episodes, critic,
max_timesteps=args.max_timesteps, reward_model,
update_timesteps=args.update_timesteps) initial_model,
actor_optim,
print_rank_0(f'Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB') critic_optim,
tokenizer=tokenizer,
ptx_coef=0,
if __name__ == '__main__': train_batch_size=args.train_batch_size,
offload_inference_models=args.offload_inference_models,
max_length=512,
do_sample=True,
temperature=1.0,
top_k=50,
use_cache=True,
callbacks=[performance_evaluator],
)
trainer.fit(
prompt_dataloader=dataloader,
pretrain_dataloader=None,
num_episodes=args.num_episodes,
num_update_steps=args.num_update_steps,
num_collect_steps=args.num_collect_steps,
)
print_rank_0(f"Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB")
if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--model', default='125m') parser.add_argument("--model", default="125m")
parser.add_argument('--critic_model', default='125m') parser.add_argument("--critic_model", default="125m")
parser.add_argument('--strategy', parser.add_argument(
choices=[ "--strategy",
'ddp', 'colossalai_gemini', 'colossalai_gemini_cpu', 'colossalai_zero2', choices=[
'colossalai_zero2_cpu', 'colossalai_zero1', 'colossalai_zero1_cpu' "ddp",
], "colossalai_gemini",
default='ddp') "colossalai_gemini_cpu",
parser.add_argument('--num_episodes', type=int, default=3) "colossalai_zero2",
parser.add_argument('--max_timesteps', type=int, default=8) "colossalai_zero2_cpu",
parser.add_argument('--update_timesteps', type=int, default=8) "colossalai_zero1",
parser.add_argument('--max_epochs', type=int, default=1) "colossalai_zero1_cpu",
parser.add_argument('--train_batch_size', type=int, default=8) ],
parser.add_argument('--experience_batch_size', type=int, default=8) default="ddp",
parser.add_argument('--lora_rank', type=int, default=0) )
parser.add_argument('--cuda_mem_frac', type=float, default=1.0) parser.add_argument("--num_episodes", type=int, default=3)
parser.add_argument('--offload_inference_models', action='store_true', default=False) parser.add_argument("--num_collect_steps", type=int, default=8)
parser.add_argument('--use_kernels', action='store_true', default=False) parser.add_argument("--num_update_steps", type=int, default=1)
parser.add_argument("--train_batch_size", type=int, default=8)
parser.add_argument("--experience_batch_size", type=int, default=8)
parser.add_argument("--lora_rank", type=int, default=0)
parser.add_argument("--cuda_mem_frac", type=float, default=1.0)
parser.add_argument("--offload_inference_models", action="store_true", default=False)
parser.add_argument("--use_kernels", action="store_true", default=False)
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment