Merge branch 'main' of https://github.com/hpcaitech/ColossalAI

9e768b59 · zhuwenwen · 7bc5a8e3 · 8aed02b9 · 9e768b59 · 9e768b59
Commit 9e768b59 authored Oct 10, 2023 by zhuwenwen
20 changed files
--- a/.github/workflows/run_chatgpt_examples.yml
+++ b/.github/workflows/run_chatgpt_examples.yml
@@ -4,11 +4,10 @@ on:
  pull_request:
    types: [synchronize, opened, reopened]
    paths:
-      - 'applications/Chat/coati/**'
-      - 'applications/Chat/requirements.txt'
-      - 'applications/Chat/setup.py'
-      - 'applications/Chat/examples/**'
-
+      - "applications/Chat/coati/**"
+      - "applications/Chat/requirements.txt"
+      - "applications/Chat/setup.py"
+      - "applications/Chat/examples/**"

 jobs:
  tests:
@@ -20,7 +19,7 @@ jobs:
    runs-on: [self-hosted, gpu]
    container:
      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
-      options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat
+      options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat --shm-size=10.24gb
    timeout-minutes: 30
    defaults:
      run:
@@ -29,28 +28,26 @@ jobs:
      - name: Checkout ColossalAI
        uses: actions/checkout@v2

-      - name: Install ColossalAI and ChatGPT
+      - name: Install ChatGPT
        run: |
-          pip install -e .
          cd applications/Chat
          pip install -v .
          pip install -r examples/requirements.txt

      - name: Install Transformers
        run: |
-          cd applications/Chat
-          git clone https://github.com/hpcaitech/transformers
-          cd transformers
-          pip install -v .
+          pip install transformers==4.30.2

      - name: Execute Examples
        run: |
          cd applications/Chat
          rm -rf ~/.cache/colossalai
-          ./examples/test_ci.sh
+          ./tests/test_inference.sh
+          ./tests/test_benchmarks.sh
+          ./tests/test_train.sh
        env:
          NCCL_SHM_DISABLE: 1
          MAX_JOBS: 8
          SFT_DATASET: /data/scratch/github_actions/chat/data.json
-          PROMPT_PATH: /data/scratch/github_actions/chat/prompts_en.jsonl
+          PROMPT_DATASET: /data/scratch/github_actions/chat/prompts_en.jsonl
          PRETRAIN_DATASET: /data/scratch/github_actions/chat/alpaca_data.json
--- a/.github/workflows/run_chatgpt_unit_tests.yml
+++ b/.github/workflows/run_chatgpt_unit_tests.yml
@@ -30,9 +30,8 @@ jobs:
      - name: Checkout ColossalAI
        uses: actions/checkout@v2

-      - name: Install ColossalAI and ChatGPT
+      - name: Install ChatGPT
        run: |
-          pip install -e .
          cd applications/Chat
          pip install -v .
          pip install -r requirements-test.txt

--- a/.github/workflows/scripts/check_doc_i18n.py
+++ b/.github/workflows/scripts/check_doc_i18n.py
@@ -22,13 +22,13 @@ def compare_dirs(dir1, dir2):

        # If the corresponding item doesn't exist in the second directory, the directories are different
        if not os.path.exists(item_path2):
-            print(f'Found mismatch: {item_path1}, {item_path2}')
+            print(f"Found mismatch: {item_path1}, {item_path2}")
            return False

        # If the corresponding item is a directory, we compare the two directories recursively
        if os.path.isdir(item_path1) and os.path.isdir(item_path2):
            if not compare_dirs(item_path1, item_path2):
-                print(f'Found mismatch: {item_path1}, {item_path2}')
+                print(f"Found mismatch: {item_path1}, {item_path2}")
                return False

        # both are files
@@ -37,16 +37,16 @@ def compare_dirs(dir1, dir2):

        # If the corresponding item is not a file or a directory, the directories are different
        else:
-            print(f'Found mismatch: {item_path1}, {item_path2}')
+            print(f"Found mismatch: {item_path1}, {item_path2}")
            return False

    # If all items are the same, the directories are the same
    return True


-if __name__ == '__main__':
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument('-d', '--directory', help="The directory where the multi-language source files are kept.")
+    parser.add_argument("-d", "--directory", help="The directory where the multi-language source files are kept.")
    args = parser.parse_args()

    i18n_folders = os.listdir(args.directory)
@@ -56,7 +56,7 @@ if __name__ == '__main__':
        for i in range(1, len(i18n_folders)):
            dir1 = i18n_folders[0]
            dir2 = i18n_folders[i]
-            print(f'comparing {dir1} vs {dir2}')
+            print(f"comparing {dir1} vs {dir2}")
            match = compare_dirs(i18n_folders[0], i18n_folders[i])

            if not match:

--- a/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
+++ b/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
@@ -4,7 +4,7 @@ import os

 def check_inputs(input_list):
    for path in input_list:
-        real_path = os.path.join('examples', path)
+        real_path = os.path.join("examples", path)
        if not os.path.exists(real_path):
            return False
    return True
@@ -12,16 +12,16 @@ def check_inputs(input_list):

 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
+    parser.add_argument("-f", "--fileNameList", type=str, help="List of file names")
    args = parser.parse_args()
    name_list = args.fileNameList.split(",")
    is_correct = check_inputs(name_list)

    if is_correct:
-        print('success')
+        print("success")
    else:
-        print('failure')
+        print("failure")


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/.github/workflows/scripts/example_checks/check_example_weekly.py
+++ b/.github/workflows/scripts/example_checks/check_example_weekly.py
@@ -17,21 +17,21 @@ def show_files(path, all_files):


 def join(input_list, sep=None):
-    return (sep or ' ').join(input_list)
+    return (sep or " ").join(input_list)


 def main():
-    contents = show_files('examples/', [])
+    contents = show_files("examples/", [])
    all_loc = []
    for file_loc in contents:
-        split_loc = file_loc.split('/')
+        split_loc = file_loc.split("/")
        # must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
        if len(split_loc) >= 4:
-            re_loc = '/'.join(split_loc[1:3])
+            re_loc = "/".join(split_loc[1:3])
            if re_loc not in all_loc:
                all_loc.append(re_loc)
    print(all_loc)


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/.github/workflows/scripts/example_checks/detect_changed_example.py
+++ b/.github/workflows/scripts/example_checks/detect_changed_example.py
@@ -3,7 +3,7 @@ import argparse

 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
+    parser.add_argument("-f", "--fileNameList", type=str, help="The list of changed files")
    args = parser.parse_args()
    name_list = args.fileNameList.split(":")
    folder_need_check = set()
@@ -15,10 +15,10 @@ def main():
        #     - application
        #       - file
        if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
-            folder_need_check.add('/'.join(loc.split("/")[1:3]))
+            folder_need_check.add("/".join(loc.split("/")[1:3]))
    # Output the result using print. Then the shell can get the values.
    print(list(folder_need_check))


-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
+++ b/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
 import os
-from dataclasses import dataclass
 from datetime import datetime, timedelta
 from typing import Any, Dict, List

@@ -10,8 +9,7 @@ import seaborn
 from requests_toolbelt import MultipartEncoder


-@dataclass
-class Contributor:
+class Counter(dict):
    """
    Dataclass for a github contributor.

@@ -19,8 +17,40 @@ class Contributor:
        name (str): name of the contributor
        num_commits_this_week (int): number of commits made within one week
    """
-    name: str
-    num_commits_this_week: int
+
+    def record(self, item: str):
+        if item in self:
+            self[item] += 1
+        else:
+            self[item] = 1
+
+    def to_sorted_list(self):
+        data = [(key, value) for key, value in self.items()]
+        data.sort(key=lambda x: x[1], reverse=True)
+        return data
+
+
+def get_utc_time_one_week_ago():
+    """
+    Get the UTC time one week ago.
+    """
+    now = datetime.utcnow()
+    start_datetime = now - timedelta(days=7)
+    return start_datetime
+
+
+def datetime2str(dt):
+    """
+    Convert datetime to string in the format of YYYY-MM-DDTHH:MM:SSZ
+    """
+    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def str2datetime(string):
+    """
+    Convert string in the format of YYYY-MM-DDTHH:MM:SSZ to datetime
+    """
+    return datetime.strptime(string, "%Y-%m-%dT%H:%M:%SZ")


 def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
@@ -36,9 +66,30 @@ def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title:
    plt.savefig(output_path, dpi=1200)


-def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]:
+def get_organization_repositories(github_token, organization_name) -> List[str]:
+    """
+    Retrieve the public repositories under the organization.
+    """
+    url = f"https://api.github.com/orgs/{organization_name}/repos?type=public"
+
+    # prepare header
+    headers = {
+        "Authorization": f"Bearer {github_token}",
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+    }
+
+    res = requests.get(url, headers=headers).json()
+    repo_list = []
+
+    for item in res:
+        repo_list.append(item["name"])
+    return repo_list
+
+
+def get_issue_pull_request_comments(github_token: str, org_name: str, repo_name: str, since: str) -> Dict[str, int]:
    """
-    Retrive the issue/PR comments made by our members in the last 7 days.
+    Retrieve the issue/PR comments made by our members in the last 7 days.

    Args:
        github_token (str): GitHub access token for API calls
@@ -46,9 +97,9 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
    """
    # prepare header
    headers = {
-        'Authorization': f'Bearer {github_token}',
-        'Accept': 'application/vnd.github+json',
-        'X-GitHub-Api-Version': '2022-11-28'
+        "Authorization": f"Bearer {github_token}",
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
    }

    user_engagement_count = {}
@@ -56,28 +107,28 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
    # do pagination to the API
    page = 1
    while True:
-        comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}'
+        comment_api = f"https://api.github.com/repos/{org_name}/{repo_name}/issues/comments?since={since}&page={page}"
        comment_response = requests.get(comment_api, headers=headers).json()

        if len(comment_response) == 0:
            break
        else:
            for item in comment_response:
-                comment_author_relationship = item['author_association']
-                if comment_author_relationship != 'MEMBER':
+                comment_author_relationship = item["author_association"]
+                if comment_author_relationship != "MEMBER":
                    # if the comment is not made by our member
                    # we don't count this comment towards user engagement
                    continue

-                issue_id = item['issue_url'].split('/')[-1]
-                issue_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/{issue_id}'
+                issue_id = item["issue_url"].split("/")[-1]
+                issue_api = f"https://api.github.com/repos/{org_name}/{repo_name}/issues/{issue_id}"
                issue_response = requests.get(issue_api, headers=headers).json()
-                issue_author_relationship = issue_response['author_association']
+                issue_author_relationship = issue_response["author_association"]

-                if issue_author_relationship != 'MEMBER':
+                if issue_author_relationship != "MEMBER":
                    # this means that the issue/PR is not created by our own people
                    # any comments in this issue/PR by our member will be counted towards the leaderboard
-                    member_name = item['user']['login']
+                    member_name = item["user"]["login"]

                    if member_name in user_engagement_count:
                        user_engagement_count[member_name] += 1
@@ -87,9 +138,9 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
    return user_engagement_count


-def get_discussion_comments(github_token, since) -> Dict[str, int]:
+def get_discussion_comments(github_token: str, org_name: str, repo_name: str, since: str) -> Dict[str, int]:
    """
-    Retrive the discussion comments made by our members in the last 7 days.
+    Retrieve the discussion comments made by our members in the last 7 days.
    This is only available via the GitHub GraphQL API.

    Args:
@@ -102,10 +153,10 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
        if cursor is None:
            offset_str = ""
        else:
-            offset_str = f", after: \"{cursor}\""
+            offset_str = f', after: "{cursor}"'
        query = f"""
        {{
-            repository(owner: "hpcaitech", name: "ColossalAI"){{
+            repository(owner: "{org_name}", name: "{repo_name}"){{
                discussions(first: {num} {offset_str}){{
                    edges {{
                        cursor
@@ -131,10 +182,10 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
        if cursor is None:
            offset_str = ""
        else:
-            offset_str = f", before: \"{cursor}\""
+            offset_str = f', before: "{cursor}"'
        query = f"""
        {{
-            repository(owner: "hpcaitech", name: "ColossalAI"){{
+            repository(owner: "{org_name}", name: "{repo_name}"){{
                discussion(number: {discussion_number}){{
                    title
                    comments(last: {num} {offset_str}){{
@@ -169,8 +220,8 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
    # a utility function to make call to Github GraphQL API
    def _call_graphql_api(query):
        headers = {"Authorization": f"Bearer {github_token}"}
-        json_data = {'query': query}
-        response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers)
+        json_data = {"query": query}
+        response = requests.post("https://api.github.com/graphql", json=json_data, headers=headers)
        data = response.json()
        return data

@@ -183,21 +234,21 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
        data = _call_graphql_api(query)
        found_discussion_out_of_time_range = False

-        edges = data['data']['repository']['discussions']['edges']
+        edges = data["data"]["repository"]["discussions"]["edges"]
        if len(edges) == 0:
            break
        else:
            # keep the discussion whose author is not a member
            for edge in edges:
                # print the discussion title
-                discussion = edge['node']
+                discussion = edge["node"]
+                discussion_updated_at = str2datetime(discussion["updatedAt"])

-                discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
                # check if the updatedAt is within the last 7 days
-                # if yes, add it to dicussion_numbers
+                # if yes, add it to discussion_numbers
                if discussion_updated_at > since:
-                    if discussion['authorAssociation'] != 'MEMBER':
-                        discussion_numbers.append(discussion['number'])
+                    if discussion["authorAssociation"] != "MEMBER":
+                        discussion_numbers.append(discussion["number"])
                else:
                    found_discussion_out_of_time_range = True

@@ -205,54 +256,55 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
            break
        else:
            # update cursor
-            cursor = edges[-1]['cursor']
+            cursor = edges[-1]["cursor"]

-    # get the dicussion comments and replies made by our member
+    # get the discussion comments and replies made by our member
    user_engagement_count = {}
-    for dicussion_number in discussion_numbers:
+    for discussion_number in discussion_numbers:
        cursor = None
        num_per_request = 10

        while True:
-            query = _generate_comment_reply_count_for_discussion(dicussion_number, num_per_request, cursor)
+            query = _generate_comment_reply_count_for_discussion(discussion_number, num_per_request, cursor)
            data = _call_graphql_api(query)

            # get the comments
-            edges = data['data']['repository']['discussion']['comments']['edges']
+            edges = data["data"]["repository"]["discussion"]["comments"]["edges"]

            # update the cursor
            if len(edges) == 0:
                break
            else:
                # update cursor for pagination
-                cursor = edges[-1]['cursor']
+                cursor = edges[-1]["cursor"]

                for edge in edges:
-                    comment = edge['node']
-                    if comment['authorAssociation'] == 'MEMBER':
+                    comment = edge["node"]
+                    if comment["authorAssociation"] == "MEMBER":
                        # check if the updatedAt is within the last 7 days
                        # if yes, add it to user_engagement_count
-                        comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
+                        comment_updated_at = datetime.strptime(comment["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
                        if comment_updated_at > since:
-                            member_name = comment['author']['login']
+                            member_name = comment["author"]["login"]
                            if member_name in user_engagement_count:
                                user_engagement_count[member_name] += 1
                            else:
                                user_engagement_count[member_name] = 1

                    # get the replies
-                    reply_edges = comment['replies']['edges']
+                    reply_edges = comment["replies"]["edges"]
                    if len(reply_edges) == 0:
                        continue
                    else:
                        for reply_edge in reply_edges:
-                            reply = reply_edge['node']
-                            if reply['authorAssociation'] == 'MEMBER':
+                            reply = reply_edge["node"]
+                            if reply["authorAssociation"] == "MEMBER":
                                # check if the updatedAt is within the last 7 days
-                                # if yes, add it to dicussion_numbers
-                                reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
+                                # if yes, add it to discussion_numbers
+
+                                reply_updated_at = datetime.strptime(reply["updatedAt"], "%Y-%m-%dT%H:%M:%SZ")
                                if reply_updated_at > since:
-                                    member_name = reply['author']['login']
+                                    member_name = reply["author"]["login"]
                                    if member_name in user_engagement_count:
                                        user_engagement_count[member_name] += 1
                                    else:
@@ -260,7 +312,9 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
    return user_engagement_count


-def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool:
+def generate_user_engagement_leaderboard_image(
+    github_token: str, org_name: str, repo_list: List[str], output_path: str
+) -> bool:
    """
    Generate the user engagement leaderboard image for stats within the last 7 days

@@ -270,22 +324,31 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
    """

    # request to the Github API to get the users who have replied the most in the last 7 days
-    now = datetime.utcnow()
-    start_datetime = now - timedelta(days=7)
-    start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
+    start_datetime = get_utc_time_one_week_ago()
+    start_datetime_str = datetime2str(start_datetime)

    # get the issue/PR comments and discussion comment count
-    issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
-    discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
    total_engagement_count = {}

-    # update the total engagement count
-    total_engagement_count.update(issue_pr_engagement_count)
-    for name, count in discussion_engagement_count.items():
-        if name in total_engagement_count:
-            total_engagement_count[name] += count
-        else:
-            total_engagement_count[name] = count
+    def _update_count(counter):
+        for name, count in counter.items():
+            if name in total_engagement_count:
+                total_engagement_count[name] += count
+            else:
+                total_engagement_count[name] = count
+
+    for repo_name in repo_list:
+        print(f"Fetching user engagement count for {repo_name}/{repo_name}")
+        issue_pr_engagement_count = get_issue_pull_request_comments(
+            github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime_str
+        )
+        discussion_engagement_count = get_discussion_comments(
+            github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime
+        )
+
+        # update the total engagement count
+        _update_count(issue_pr_engagement_count)
+        _update_count(discussion_engagement_count)

    # prepare the data for plotting
    x = []
@@ -302,20 +365,17 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
            x.append(count)
            y.append(name)

-        # use Shanghai time to display on the image
-        start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
-
        # plot the leaderboard
        xlabel = f"Number of Comments made (since {start_datetime_str})"
        ylabel = "Member"
-        title = 'Active User Engagement Leaderboard'
+        title = "Active User Engagement Leaderboard"
        plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
        return True
    else:
        return False


-def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
+def generate_contributor_leaderboard_image(github_token, org_name, repo_list, output_path) -> bool:
    """
    Generate the contributor leaderboard image for stats within the last 7 days

@@ -324,54 +384,81 @@ def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
        output_path (str): the path to save the image
    """
    # request to the Github API to get the users who have contributed in the last 7 days
-    URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
    headers = {
-        'Authorization': f'Bearer {github_token}',
-        'Accept': 'application/vnd.github+json',
-        'X-GitHub-Api-Version': '2022-11-28'
+        "Authorization": f"Bearer {github_token}",
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
    }

-    while True:
-        response = requests.get(URL, headers=headers).json()
+    counter = Counter()
+    start_datetime = get_utc_time_one_week_ago()

-        if len(response) != 0:
-            # sometimes the Github API returns empty response for unknown reason
-            # request again if the response is empty
-            break
+    def _get_url(org_name, repo_name, page):
+        return f"https://api.github.com/repos/{org_name}/{repo_name}/pulls?per_page=50&page={page}&state=closed"

-    contributor_list = []
+    def _iterate_by_page(org_name, repo_name):
+        page = 1
+        stop = False

-    # get number of commits for each contributor
-    start_timestamp = None
-    for item in response:
-        num_commits_this_week = item['weeks'][-1]['c']
-        name = item['author']['login']
-        contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
-        contributor_list.append(contributor)
+        while not stop:
+            print(f"Fetching pull request data for {org_name}/{repo_name} - page{page}")
+            url = _get_url(org_name, repo_name, page)

-        # update start_timestamp
-        start_timestamp = item['weeks'][-1]['w']
+            while True:
+                response = requests.get(url, headers=headers).json()
+
+                if isinstance(response, list):
+                    # sometimes the Github API returns nothing
+                    # request again if the response is not a list
+                    break
+                print("Empty response, request again...")
+
+            if len(response) == 0:
+                # if the response is empty, stop
+                stop = True
+                break
+
+            # count the pull request and author from response
+            for pr_data in response:
+                merged_at = pr_data["merged_at"]
+                author = pr_data["user"]["login"]
+
+                if merged_at is None:
+                    continue
+
+                merge_datetime = str2datetime(merged_at)
+
+                if merge_datetime < start_datetime:
+                    # if we found a pull request that is merged before the start_datetime
+                    # we stop
+                    stop = True
+                    break
+                else:
+                    # record the author1
+                    counter.record(author)
+
+            # next page
+            page += 1
+
+    for repo_name in repo_list:
+        _iterate_by_page(org_name, repo_name)

    # convert unix timestamp to Beijing datetime
-    start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
-    start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
+    bj_start_datetime = datetime.fromtimestamp(start_datetime.timestamp(), tz=pytz.timezone("Asia/Shanghai"))
+    bj_start_datetime_str = datetime2str(bj_start_datetime)

-    # sort by number of commits
-    contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
+    contribution_list = counter.to_sorted_list()

    # remove contributors who has zero commits
-    contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0]
-
-    # prepare the data for plotting
-    x = [x.num_commits_this_week for x in contributor_list]
-    y = [x.name for x in contributor_list]
+    author_list = [x[0] for x in contribution_list]
+    num_commit_list = [x[1] for x in contribution_list]

    # plot
-    if len(x) > 0:
-        xlabel = f"Number of Commits (since {start_datetime_str})"
+    if len(author_list) > 0:
+        xlabel = f"Number of Pull Requests (since {bj_start_datetime_str})"
        ylabel = "Contributor"
-        title = 'Active Contributor Leaderboard'
-        plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
+        title = "Active Contributor Leaderboard"
+        plot_bar_chart(num_commit_list, author_list, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
        return True
    else:
        return False
@@ -386,14 +473,14 @@ def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
        image_path (str): the path to the image to be uploaded
    """
    url = "https://open.feishu.cn/open-apis/im/v1/images"
-    form = {'image_type': 'message', 'image': (open(image_path, 'rb'))}    # 需要替换具体的path
+    form = {"image_type": "message", "image": (open(image_path, "rb"))}  # 需要替换具体的path
    multi_form = MultipartEncoder(form)
    headers = {
-        'Authorization': f'Bearer {lark_tenant_token}',    ## 获取tenant_access_token, 需要替换为实际的token
+        "Authorization": f"Bearer {lark_tenant_token}",  ## 获取tenant_access_token, 需要替换为实际的token
    }
-    headers['Content-Type'] = multi_form.content_type
+    headers["Content-Type"] = multi_form.content_type
    response = requests.request("POST", url, headers=headers, data=multi_form).json()
-    return response['data']['image_key']
+    return response["data"]["image_key"]


 def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
@@ -404,10 +491,10 @@ def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
        app_id (str): Lark app id
        app_secret (str): Lark app secret
    """
-    url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal'
-    data = {'app_id': app_id, 'app_secret': app_secret}
+    url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal"
+    data = {"app_id": app_id, "app_secret": app_secret}
    response = requests.post(url, json=data).json()
-    return response['tenant_access_token']
+    return response["tenant_access_token"]


 def send_image_to_lark(image_key: str, webhook_url: str) -> None:
@@ -434,31 +521,37 @@ def send_message_to_lark(message: str, webhook_url: str):
    requests.post(webhook_url, json=data)


-if __name__ == '__main__':
-    GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
-    CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png'
-    USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
+if __name__ == "__main__":
+    GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
+    CONTRIBUTOR_IMAGE_PATH = "contributor_leaderboard.png"
+    USER_ENGAGEMENT_IMAGE_PATH = "engagement_leaderboard.png"
+    ORG_NAME = "hpcaitech"
+
+    # get all open source repositories
+    REPO_LIST = get_organization_repositories(GITHUB_TOKEN, ORG_NAME)

    # generate images
-    contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
-    engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
+    contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, CONTRIBUTOR_IMAGE_PATH)
+    engagement_success = generate_user_engagement_leaderboard_image(
+        GITHUB_TOKEN, ORG_NAME, REPO_LIST, USER_ENGAGEMENT_IMAGE_PATH
+    )

    # upload images
-    APP_ID = os.environ['LARK_APP_ID']
-    APP_SECRET = os.environ['LARK_APP_SECRET']
+    APP_ID = os.environ["LARK_APP_ID"]
+    APP_SECRET = os.environ["LARK_APP_SECRET"]
    LARK_TENANT_TOKEN = generate_lark_tenant_access_token(app_id=APP_ID, app_secret=APP_SECRET)
    contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH)
    user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)

    # send message to lark
-    LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL']
+    LARK_WEBHOOK_URL = os.environ["LARK_WEBHOOK_URL"]
    message = """本周的社区榜单出炉啦！
 1. 开发贡献者榜单
 2. 用户互动榜单

 注：
- 开发贡献者测评标准为：本周由公司成员提交的commit次数
- 用户互动榜单测评标准为：本周由公司成员在非成员创建的issue/PR/discussion中回复的次数
+- 开发贡献者测评标准为：本周由公司成员与社区在所有开源仓库提交的Pull Request次数
+- 用户互动榜单测评标准为：本周由公司成员在非成员在所有开源仓库创建的issue/PR/discussion中回复的次数
 """

    send_message_to_lark(message, LARK_WEBHOOK_URL)
@@ -467,7 +560,7 @@ if __name__ == '__main__':
    if contrib_success:
        send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
    else:
-        send_message_to_lark("本周没有成员贡献commit，无榜单图片生成。", LARK_WEBHOOK_URL)
+        send_message_to_lark("本周没有成员贡献PR，无榜单图片生成。", LARK_WEBHOOK_URL)

    # send user engagement image to lark
    if engagement_success:

--- a/.github/workflows/scripts/generate_release_draft.py
+++ b/.github/workflows/scripts/generate_release_draft.py
@@ -7,27 +7,27 @@ import re

 import requests

-COMMIT_API = 'https://api.github.com/repos/hpcaitech/ColossalAI/commits'
-TAGS_API = 'https://api.github.com/repos/hpcaitech/ColossalAI/tags'
+COMMIT_API = "https://api.github.com/repos/hpcaitech/ColossalAI/commits"
+TAGS_API = "https://api.github.com/repos/hpcaitech/ColossalAI/tags"


 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument('--out', type=str, help='output path for the release draft', required=True)
-    parser.add_argument('--version', type=str, help='current version to release', required=True)
+    parser.add_argument("--out", type=str, help="output path for the release draft", required=True)
+    parser.add_argument("--version", type=str, help="current version to release", required=True)
    return parser.parse_args()


 def get_latest_tag_commit(headers=None):
    res = requests.get(url=TAGS_API, headers=headers)
    data = res.json()
-    commit_hash = data[0]['commit']['sha']
-    version = data[0]['name']
+    commit_hash = data[0]["commit"]["sha"]
+    version = data[0]["name"]
    return commit_hash, version


 def get_commit_info(commit_hash, headers=None):
-    api = f'{COMMIT_API}/{commit_hash}'
+    api = f"{COMMIT_API}/{commit_hash}"
    res = requests.get(url=api, headers=headers)
    return res.json()

@@ -37,7 +37,7 @@ def get_all_commit_info(since, headers=None):
    results = []

    while True:
-        api = f'{COMMIT_API}?since={since}&per_page=100&page={page}'
+        api = f"{COMMIT_API}?since={since}&per_page=100&page={page}"
        resp = requests.get(url=api, headers=headers)
        data = resp.json()

@@ -53,21 +53,21 @@ def get_all_commit_info(since, headers=None):

 def collate_release_info(commit_info_list):
    results = dict()
-    pattern = pattern = r'\[.*\]'
+    pattern = pattern = r"\[.*\]"

    for commit_info in commit_info_list:
-        author = commit_info['commit']['author']['name']
+        author = commit_info["commit"]["author"]["name"]

        try:
-            author_url = commit_info['author']['url']
+            author_url = commit_info["author"]["url"]
        except:
            # author can be None
            author_url = None
-        msg = commit_info['commit']['message']
+        msg = commit_info["commit"]["message"]
        match = re.search(pattern, msg)

        if match:
-            tag = match.group().lstrip('[').rstrip(']').capitalize()
+            tag = match.group().lstrip("[").rstrip("]").capitalize()
            if tag not in results:
                results[tag] = []
            results[tag].append((msg, author, author_url))
@@ -89,42 +89,43 @@ def generate_release_post_markdown(current_version, last_version, release_info):

        for msg, author, author_url in v:
            # only keep the first line
-            msg = msg.split('\n')[0]
+            msg = msg.split("\n")[0]

            if author_url:
-                item = f'{msg} by [{author}]({author_url})\n'
+                item = f"{msg} by [{author}]({author_url})\n"
            else:
-                item = f'{msg} by {author}\n'
-            text.append(f'- {item}')
+                item = f"{msg} by {author}\n"
+            text.append(f"- {item}")

-        text.append('\n')
+        text.append("\n")

    # add full change log
    text.append(
-        f'**Full Changelog**: https://github.com/hpcaitech/ColossalAI/compare/{current_version}...{last_version}')
+        f"**Full Changelog**: https://github.com/hpcaitech/ColossalAI/compare/{current_version}...{last_version}"
+    )

    return text


-if __name__ == '__main__':
+if __name__ == "__main__":
    args = parse_args()
-    token = os.environ['GITHUB_API_TOKEN']
-    headers = {'Authorization': token}
+    token = os.environ["GITHUB_API_TOKEN"]
+    headers = {"Authorization": token}

    # get previous release tag
    last_release_commit, last_version = get_latest_tag_commit(headers)
    last_release_commit_info = get_commit_info(last_release_commit, headers=headers)
-    last_release_date = last_release_commit_info['commit']['author']['date']
+    last_release_date = last_release_commit_info["commit"]["author"]["date"]

    # get the commits since last release
    commit_info = get_all_commit_info(since=last_release_date, headers=headers)
-    commit_info = commit_info[:-1]    # remove the release commit
+    commit_info = commit_info[:-1]  # remove the release commit

    # collate into markdown
    release_info = collate_release_info(commit_info)
    markdown_text = generate_release_post_markdown(args.version, last_version, release_info)

    # write into a file
-    with open(args.out, 'w') as f:
+    with open(args.out, "w") as f:
        for line in markdown_text:
            f.write(line)
--- a/.github/workflows/scripts/send_message_to_lark.py
+++ b/.github/workflows/scripts/send_message_to_lark.py
@@ -5,8 +5,8 @@ import requests

 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument('-m', '--message', type=str)
-    parser.add_argument('-u', '--url', type=str)
+    parser.add_argument("-m", "--message", type=str)
+    parser.add_argument("-u", "--url", type=str)
    return parser.parse_args()


@@ -15,6 +15,6 @@ def send_message_to_lark(message, webhook_url):
    requests.post(webhook_url, json=data)


-if __name__ == '__main__':
+if __name__ == "__main__":
    args = parse_args()
    send_message_to_lark(args.message, args.url)
--- a/.gitignore
+++ b/.gitignore
@@ -155,3 +155,7 @@ colossalai/version.py
 # ignore coverage test file
 coverage.lcov
 coverage.xml
+
+# ignore testmon and coverage files
+.coverage
+.testmondata*
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -3,3 +3,5 @@ line_length = 120
 multi_line_output=3
 include_trailing_comma = true
 ignore_comments = true
+profile = black
+honor_noqa = true
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
 repos:

+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.2.1
+    hooks:
+      - id: autoflake
+        name: autoflake (python)
+        args: ['--in-place', '--remove-unused-variables', '--remove-all-unused-imports', '--ignore-init-module-imports']
+
  - repo: https://github.com/pycqa/isort
    rev: 5.12.0
    hooks:
      - id: isort
        name: sort all imports (python)

-  - repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.32.0
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 23.9.1
    hooks:
-    - id: yapf
-      name: yapf formatter
-      args: ['--style=.style.yapf', '--parallel', '--in-place']
+    - id: black
+      name: black formatter
+      args: ['--line-length=120', '--target-version=py37', '--target-version=py38', '--target-version=py39','--target-version=py310']

  - repo: https://github.com/pre-commit/mirrors-clang-format
    rev: v13.0.1
    hooks:
    - id: clang-format
      name: clang formatter
+      types_or: [c++, c]

  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.3.0

--- a/.style.yapf
+++ b/.style.yapf
-[style]
-based_on_style = google
-spaces_before_comment = 4
-split_before_logical_operator = true
-column_limit = 120
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -30,6 +30,12 @@ pip install <options> -e .
 ### Unit Tests
 We use [PyTest](https://docs.pytest.org/en/latest/) to execute tests. You can install pytest by `pip install pytest`. As some of the tests require initialization of the distributed backend, GPUs are needed to execute these tests.

+To set up the environment for unit testing, first change your current directory to the root directory of your local ColossalAI repository, then run
+```bash
+pip install -r requirements/requirements-test.txt
+```
+If you encounter an error telling "Could not find a version that satisfies the requirement fbgemm-gpu==0.2.0", please downgrade your python version to 3.8 or 3.9 and try again.
+
 If you only want to run CPU tests, you can run

 ```bash
@@ -138,4 +144,4 @@ You can now create a pull request on the GitHub webpage of your repository. The

 Do write clearly the description of your pull request and [link the pull request to your target issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue). This will automatically close the issue when the pull request is approved.

-In case of code conflict, you should rebase your branch and resolve the conflicts manually.
\ No newline at end of file
+In case of code conflict, you should rebase your branch and resolve the conflicts manually.
--- a/LICENSE
+++ b/LICENSE
@@ -396,3 +396,84 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
+
+   ---------------- LICENSE FOR VLLM TEAM ----------------
+
+   from VLLM TEAM:
+
+      Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+         https://github.com/vllm-project/vllm/blob/main/LICENSE
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   ---------------- LICENSE FOR LIGHTLLM TEAM ----------------
+
+   from LIGHTLLM TEAM:
+
+      Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+         https://github.com/ModelTC/lightllm/blob/main/LICENSE
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   ---------------- LICENSE FOR AutoGPTQ ----------------
+
+   From AutoGPTQ:
+
+   MIT License
+
+   Copyright (c) 2023 潘其威(William)
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in all
+   copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+
+   ---------------- LICENSE FOR exllama ----------------
+
+   From exllama:
+
+   MIT License
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in all
+   copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@
   [![Documentation](https://readthedocs.org/projects/colossalai/badge/?version=latest)](https://colossalai.readthedocs.io/en/latest/?badge=latest)
   [![CodeFactor](https://www.codefactor.io/repository/github/hpcaitech/colossalai/badge)](https://www.codefactor.io/repository/github/hpcaitech/colossalai)
   [![HuggingFace badge](https://img.shields.io/badge/%F0%9F%A4%97HuggingFace-Join-yellow)](https://huggingface.co/hpcai-tech)
-   [![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w)
+   [![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://github.com/hpcaitech/public_assets/tree/main/colossalai/contact/slack)
   [![WeChat badge](https://img.shields.io/badge/微信-加入-green?logo=wechat&amp)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png)


@@ -25,14 +25,15 @@
 </div>

 ## Latest News
+* [2023/09] [One Half-Day of Training Using a Few Hundred Dollars Yields Similar Results to Mainstream Large Models, Open-Source and Commercial-Free Domain-Specific Llm Solution](https://www.hpc-ai.tech/blog/one-half-day-of-training-using-a-few-hundred-dollars-yields-similar-results-to-mainstream-large-models-open-source-and-commercial-free-domain-specific-llm-solution)
+* [2023/09] [70 Billion Parameter LLaMA2 Model Training Accelerated by 195%](https://www.hpc-ai.tech/blog/70b-llama2-training)
+* [2023/07] [HPC-AI Tech Raises 22 Million USD in Series A Funding](https://www.hpc-ai.tech/blog/hpc-ai-tech-raises-22-million-usd-in-series-a-funding-to-fuel-team-expansion-and-business-growth)
+* [2023/07] [65B Model Pretraining Accelerated by 38%, Best Practices for Building LLaMA-Like Base Models Open-Source](https://www.hpc-ai.tech/blog/large-model-pretraining)
 * [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
 * [2023/03] [Intel and Colossal-AI Partner to Deliver Cost-Efficient Open-Source Solution for Protein Folding Structure Prediction](https://www.hpc-ai.tech/blog/intel-habana)
 * [2023/03] [AWS and Google Fund Colossal-AI with Startup Cloud Programs](https://www.hpc-ai.tech/blog/aws-and-google-fund-colossal-ai-with-startup-cloud-programs)
 * [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt)
 * [2023/01] [Hardware Savings Up to 46 Times for AIGC and  Automatic Parallelism](https://medium.com/pytorch/latest-colossal-ai-boasts-novel-automatic-parallelism-and-offers-savings-up-to-46x-for-stable-1453b48f3f02)
-* [2022/11] [Diffusion Pretraining and Hardware Fine-Tuning Can Be Almost 7X Cheaper](https://www.hpc-ai.tech/blog/diffusion-pretraining-and-hardware-fine-tuning-can-be-almost-7x-cheaper)
-* [2022/10] [Use a Laptop to Analyze 90% of Proteins, With a Single-GPU Inference Sequence Exceeding 10,000](https://www.hpc-ai.tech/blog/use-a-laptop-to-analyze-90-of-proteins-with-a-single-gpu-inference-sequence-exceeding)
-* [2022/09] [HPC-AI Tech Completes $6 Million Seed and Angel Round Fundraising](https://www.hpc-ai.tech/blog/hpc-ai-tech-completes-6-million-seed-and-angel-round-fundraising-led-by-bluerun-ventures-in-the)

 ## Table of Contents
 <ul>
@@ -41,6 +42,7 @@
 <li>
   <a href="#Colossal-AI-in-the-Real-World">Colossal-AI for Real World Applications</a>
   <ul>
+     <li><a href="#Colossal-LLaMA-2">Colossal-LLaMA-2: One Half-Day of Training Using a Few Hundred Dollars Yields Similar Results to Mainstream Large Models, Open-Source and Commercial-Free Domain-Specific Llm Solution</a></li>
     <li><a href="#ColossalChat">ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline</a></li>
     <li><a href="#AIGC">AIGC: Acceleration of Stable Diffusion</a></li>
     <li><a href="#Biomedicine">Biomedicine: Acceleration of AlphaFold Protein Structure</a></li>
@@ -49,6 +51,7 @@
 <li>
   <a href="#Parallel-Training-Demo">Parallel Training Demo</a>
   <ul>
+     <li><a href="#LLaMA2">LLaMA 1/2</a></li>
     <li><a href="#GPT-3">GPT-3</a></li>
     <li><a href="#GPT-2">GPT-2</a></li>
     <li><a href="#BERT">BERT</a></li>
@@ -124,15 +127,55 @@ distributed training and inference in a few lines.

 ## Colossal-AI in the Real World

+### Colossal-LLaMA-2
+
+- One half-day of training using a few hundred dollars yields similar results to mainstream large models, open-source and commercial-free domain-specific LLM solution.
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Colossal-LLaMA-2)
+[[blog]](https://www.hpc-ai.tech/blog/one-half-day-of-training-using-a-few-hundred-dollars-yields-similar-results-to-mainstream-large-models-open-source-and-commercial-free-domain-specific-llm-solution)
+[[model weights]](https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-7b-base)
+
+|                                |  Backbone  | Tokens Consumed |  |         MMLU         |     CMMLU     | AGIEval | GAOKAO | CEval  |
+| :----------------------------: | :--------: | :-------------: | :------------------: | :-----------: | :-----: | :----: | :----: | :------------------------------: |
+|                                |           |        -        |                |        5-shot        |    5-shot     | 5-shot  | 0-shot | 5-shot |
+|          Baichuan-7B           |     -      |      1.2T       |             |    42.32 (42.30)     | 44.53 (44.02) |  38.72  | 36.74  | 42.80  |
+|       Baichuan-13B-Base        |     -      |      1.4T       |             |    50.51 (51.60)     | 55.73 (55.30) |  47.20  | 51.41  | 53.60  |
+|       Baichuan2-7B-Base        |     -      |      2.6T       |             |    46.97 (54.16)     | 57.67 (57.07) |  45.76  | 52.60  | 54.00  |
+|       Baichuan2-13B-Base       |     -      |      2.6T       |             |    54.84 (59.17)     | 62.62 (61.97) |  52.08  | 58.25  | 58.10  |
+|           ChatGLM-6B           |     -      |      1.0T       |             |    39.67 (40.63)     |   41.17 (-)   |  40.10  | 36.53  | 38.90  |
+|          ChatGLM2-6B           |     -      |      1.4T       |             |    44.74 (45.46)     |   49.40 (-)   |  46.36  | 45.49  | 51.70  |
+|          InternLM-7B           |     -      |      1.6T       |                |    46.70 (51.00)     |   52.00 (-)   |  44.77  | 61.64  | 52.80  |
+|            Qwen-7B             |     -      |      2.2T       |             | 54.29 (56.70) | 56.03 (58.80) |  52.47  | 56.42  | 59.60  |
+|                                |            |                 |                 |                      |               |         |        |        |
+|           Llama-2-7B           |     -      |      2.0T       |             |    44.47 (45.30)     |   32.97 (-)   |  32.60  | 25.46  |   -    |
+| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B |      1.0T       |             |        37.43         |     29.92     |  32.00  | 27.57  |   -    |
+| wenge-research/yayi-7b-llama2  | Llama-2-7B |        -        |                |        38.56         |     31.52     |  30.99  | 25.95  |   -    |
+| ziqingyang/chinese-llama-2-7b  | Llama-2-7B |        -        |                |        33.86         |     34.69     |  34.52  | 25.18  |  34.2  |
+| TigerResearch/tigerbot-7b-base | Llama-2-7B |      0.3T       |             |        43.73         |     42.04     |  37.64  | 30.61  |   -    |
+|  LinkSoul/Chinese-Llama-2-7b   | Llama-2-7B |        -        |                |        48.41         |     38.31     |  38.45  | 27.72  |   -    |
+|       FlagAlpha/Atom-7B        | Llama-2-7B |      0.1T       |             |        49.96         |     41.10     |  39.83  | 33.00  |   -    |
+| IDEA-CCNL/Ziya-LLaMA-13B-v1.1  | Llama-13B  |      0.11T      |            |        50.25         |     40.99     |  40.04  | 30.54  |   -    |
+|  |  |  |  |  |  |  |  |  |
+|    **Colossal-LLaMA-2-7b-base**    | Llama-2-7B |      **0.0085T**      |            |        53.06         |     49.89     |  51.48  | 58.82  |  50.2  |
+
 ### ColossalChat

 <div align="center">
-   <a href="https://chat.colossalai.org/">
-   <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/Chat-demo.png" width="700" />
+   <a href="https://www.youtube.com/watch?v=HcTiHzApHm0">
+   <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20YouTube.png" width="700" />
   </a>
 </div>

-[ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat): An open-source solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline. [[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat) [[blog]](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b) [[demo]](https://chat.colossalai.org)
+[ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat): An open-source solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline.
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat)
+[[blog]](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
+[[demo]](https://www.youtube.com/watch?v=HcTiHzApHm0)
+[[tutorial]](https://www.youtube.com/watch?v=-qFBZFmOJfg)
+
+<p id="ColossalChat-Speed" align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20Speed.jpg" width=450/>
+</p>
+
+- Up to 10 times faster for RLHF PPO Stage3 Training

 <p id="ColossalChat_scaling" align="center">
 <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/>
@@ -205,6 +248,23 @@ Acceleration of [AlphaFold Protein Structure](https://alphafold.ebi.ac.uk/)
 <p align="right">(<a href="#top">back to top</a>)</p>

 ## Parallel Training Demo
+### LLaMA2
+<p align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/llama2_pretraining.png" width=600/>
+</p>
+
+- 70 billion parameter LLaMA2 model training accelerated by 195%
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/examples/language/llama2)
+[[blog]](https://www.hpc-ai.tech/blog/70b-llama2-training)
+
+### LLaMA1
+<p align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/examples/images/LLaMA_pretraining.png" width=600/>
+</p>
+
+- 65-billion-parameter large model pretraining accelerated by 38%
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/example/llama/examples/language/llama)
+[[blog]](https://www.hpc-ai.tech/blog/large-model-pretraining)

 ### GPT-3
 <p align="center">
@@ -352,6 +412,22 @@ If you want to install and enable CUDA kernel fusion (compulsory installation wh
 CUDA_EXT=1 pip install .
 ```

+For Users with CUDA 10.2, you can still build ColossalAI from source. However, you need to manually download the cub library and copy it to the corresponding directory.
+
+```bash
+# clone the repository
+git clone https://github.com/hpcaitech/ColossalAI.git
+cd ColossalAI
+
+# download the cub library
+wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
+unzip 1.8.0.zip
+cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
+
+# install
+CUDA_EXT=1 pip install .
+```
+
 <p align="right">(<a href="#top">back to top</a>)</p>

 ## Use Docker
@@ -426,6 +502,7 @@ To cite this project, you can use the following BibTeX citation.
 }
 ```

-Colossal-AI has been accepted as official tutorial by top conferences [SC](https://sc22.supercomputing.org/), [AAAI](https://aaai.org/Conferences/AAAI-23/), [PPoPP](https://ppopp23.sigplan.org/), [CVPR](https://cvpr2023.thecvf.com/), [ISC](https://www.isc-hpc.com/), etc.
+Colossal-AI has been accepted as official tutorial by top conferences [NeurIPS](https://nips.cc/), [SC](https://sc22.supercomputing.org/), [AAAI](https://aaai.org/Conferences/AAAI-23/),
+[PPoPP](https://ppopp23.sigplan.org/), [CVPR](https://cvpr2023.thecvf.com/), [ISC](https://www.isc-hpc.com/), [NVIDIA GTC](https://www.nvidia.com/en-us/on-demand/session/gtcspring23-S51482/) ,etc.

 <p align="right">(<a href="#top">back to top</a>)</p>
--- a/applications/Chat/.gitignore
+++ b/applications/Chat/.gitignore
@@ -145,4 +145,4 @@ docs/.build
 # wandb log
 example/wandb/

-examples/awesome-chatgpt-prompts/
\ No newline at end of file
+examples/awesome-chatgpt-prompts/
--- a/applications/Chat/README.md
+++ b/applications/Chat/README.md
@@ -4,7 +4,6 @@
  <span>ColossalChat</span>
 </h1>

-
 ## Table of Contents

 - [Table of Contents](#table-of-contents)
@@ -34,7 +33,9 @@
 - [Authors](#authors)
 - [Citations](#citations)
 - [Licenses](#licenses)
+
 ---
+
 ## What is ColossalChat and Coati ?

 [ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat) is the project to implement LLM with RLHF, powered by the [Colossal-AI](https://github.com/hpcaitech/ColossalAI) project.
@@ -42,6 +43,7 @@
 Coati stands for `ColossalAI Talking Intelligence`. It is the name for the module implemented in this project and is also the name of the large language model developed by the ColossalChat project.

 The Coati package provides a unified large language model framework that has implemented the following functions
+
 - Supports comprehensive large-model training acceleration capabilities for ColossalAI, without requiring knowledge of complex distributed training algorithms
 - Supervised datasets collection
 - Supervised instructions fine-tuning
@@ -56,29 +58,42 @@ The Coati package provides a unified large language model framework that has imp
    <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/chatgpt.png" width=700/>
  </p>

-   Image source: https://openai.com/blog/chatgpt
+Image source: https://openai.com/blog/chatgpt
+
 </div>

 **As Colossal-AI is undergoing some major updates, this project will be actively maintained to stay in line with the Colossal-AI project.**

-
 More details can be found in the latest news.
-* [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
-* [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt)
+
+- [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
+- [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt)

 ## Online demo
-You can experience the performance of Coati7B on this page.

-[chat.colossalai.org](https://chat.colossalai.org/)
+<div align="center">
+   <a href="https://www.youtube.com/watch?v=HcTiHzApHm0">
+   <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20YouTube.png" width="700" />
+   </a>
+</div>
+
+[ColossalChat](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat): An open-source solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline.
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat)
+[[blog]](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
+[[demo]](https://www.youtube.com/watch?v=HcTiHzApHm0)
+[[tutorial]](https://www.youtube.com/watch?v=-qFBZFmOJfg)

-Due to resource constraints, we will only provide this service from 29th Mar 2023 to 5 April 2023. However, we have provided the inference code in the [inference](./inference/) folder. The WebUI will be open-sourced soon as well.
+<p id="ColossalChat-Speed" align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/ColossalChat%20Speed.jpg" width=450/>
+</p>
+
+> DeepSpeedChat performance comes from its blog on 2023 April 12, ColossalChat performance can be reproduced on an AWS p4d.24xlarge node with 8 A100-40G GPUs with the following command: `torchrun --standalone --nproc_per_node 8 benchmark_opt_lora_dummy.py --num_collect_steps 1 --use_kernels --strategy colossalai_zero2 --experience_batch_size 64 --train_batch_size 32`

-> Warning: Due to model and dataset size limitations, Coati is just a baby model, Coati7B may output incorrect information and lack the ability for multi-turn dialogue. There is still significant room for improvement.
 ## Install

 ### Install the environment

-```shell
+```bash
 conda create -n coati
 conda activate coati
 git clone https://github.com/hpcaitech/ColossalAI.git
@@ -87,22 +102,20 @@ pip install .
 ```

 ### Install the Transformers
-Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code

-```shell
-git clone https://github.com/hpcaitech/transformers
-cd transformers
-pip install .
+```bash
+pip install transformers==4.30.2
 ```

 ## How to use?

 ### Supervised datasets collection

-we collected 104K bilingual datasets of Chinese and English, and you can find the datasets in this repo
-[InstructionWild](https://github.com/XueFuzhao/InstructionWild)
+We collected 104K bilingual datasets of Chinese and English, and you can find the datasets in this repo
+[InstructionWild](https://github.com/XueFuzhao/InstructionWild) and in this [file](https://github.com/XueFuzhao/InstructionWild/blob/main/data/README.md).

 Here is how we collected the data
+
 <p align="center">
 <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/data-collect.png" width=500/>
 </p>
@@ -112,12 +125,28 @@ Here is how we collected the data
 Stage1 is supervised instructs fine-tuning, which uses the datasets mentioned earlier to fine-tune the model.

 You can run the `examples/train_sft.sh` to start a supervised instructs fine-tuning.
+[[Stage1 tutorial video]](https://www.youtube.com/watch?v=-qFBZFmOJfg)
+
+**Note**: the supervised dataset follows the following format,
+
+```json
+[
+    {
+        "instruction": "Provide a list of the top 10 most popular mobile games in Asia",
+        "input": "",
+        "output": "The top 10 most popular mobile games in Asia are:\n1) PUBG Mobile\n2) Pokemon Go\n3) Candy Crush Saga\n4) Free Fire\n5) Clash of Clans\n6) Mario Kart Tour\n7) Arena of Valor\n8) Fantasy Westward Journey\n9) Subway Surfers\n10) ARK Survival Evolved",
+        "id": 0
+    },
+    ...
+]
+```

 ### RLHF Training Stage2 - Training reward model

 Stage2 trains a reward model, which obtains corresponding scores by manually ranking different outputs for the same prompt and supervises the training of the reward model

 You can run the `examples/train_rm.sh` to start a reward model training.
+[[Stage2 tutorial video]](https://www.youtube.com/watch?v=gMx2CApKhuo)

 ### RLHF Training Stage3 - Training model with reinforcement learning by human feedback

@@ -128,6 +157,39 @@ Stage3 uses reinforcement learning algorithm, which is the most complex part of
 </p>

 You can run the `examples/train_prompts.sh` to start training PPO with human feedback.
+[[Stage3 tutorial video]](https://www.youtube.com/watch?v=Z8wwSHxPL9g)
+
+**Note**: the required datasets follow the following format,
+
+- `pretrain dataset`
+
+  ```json
+  [
+      {
+          "instruction": "Provide a list of the top 10 most popular mobile games in Asia",
+          "input": "",
+          "output": "The top 10 most popular mobile games in Asia are:\n1) PUBG Mobile\n2) Pokemon Go\n3) Candy Crush Saga\n4) Free Fire\n5) Clash of Clans\n6) Mario Kart Tour\n7) Arena of Valor\n8) Fantasy Westward Journey\n9) Subway Surfers\n10) ARK Survival Evolved",
+          "id": 0
+      },
+      ...
+  ]
+  ```
+
+- `prompt dataset`
+
+  ```json
+  [
+      {
+          "instruction": "Edit this paragraph to make it more concise: \"Yesterday, I went to the store and bought some things. Then, I came home and put them away. After that, I went for a walk and met some friends.\"",
+          "id": 0
+      },
+      {
+          "instruction": "Write a descriptive paragraph about a memorable vacation you went on",
+          "id": 1
+      },
+      ...
+  ]
+  ```

 For more details, see [`examples/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/examples).

@@ -135,9 +197,9 @@ For more details, see [`examples/`](https://github.com/hpcaitech/ColossalAI/tree

 We provide an online inference server and a benchmark. We aim to run inference on single GPU, so quantization is essential when using large models.

-We support 8-bit quantization (RTN), 4-bit quantization (GPTQ), and  FP16 inference. You can
-Online inference server scripts can help you deploy your own services.
+We support 8-bit quantization (RTN), 4-bit quantization (GPTQ), and FP16 inference.

+Online inference server scripts can help you deploy your own services.
 For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tree/main/applications/Chat/inference).

 ## Coati7B examples
@@ -147,6 +209,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre
 <details><summary><b>E-mail</b></summary>

 ![phd](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/Phd.png)
+
 </details>

 <details><summary><b>coding</b></summary>
@@ -180,6 +243,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre
 </details>

 ### Open QA
+
 <details><summary><b>Game</b></summary>

 ![Game](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chat/game.png)
@@ -213,6 +277,7 @@ For more details, see [`inference/`](https://github.com/hpcaitech/ColossalAI/tre
 You can find more examples in this [repo](https://github.com/XueFuzhao/InstructionWild/blob/main/comparison.md).

 ### Limitation
+
 <details><summary><b>Limitation for LLaMA-finetuned models</b></summary>
 - Both Alpaca and ColossalChat are based on LLaMA. It is hard to compensate for the missing knowledge in the pre-training stage.
 - Lack of counting ability: Cannot count the number of items in a list.
@@ -236,7 +301,7 @@ You can find more examples in this [repo](https://github.com/XueFuzhao/Instructi

 We have integrated the Transformers save and load pipeline, allowing users to freely call Hugging Face's language models and save them in the HF format.

-```
+```python
 from coati.models.llama import LlamaLM
 from coati.trainer import SFTTrainer

@@ -245,20 +310,20 @@ tokenizer = AutoTokenizer.from_pretrained(args.pretrain)

 (model, optim) = strategy.prepare((model, optim))
 trainer = SFTTrainer(model=model,
-    strategy=strategy,
-    optim=optim,
-    train_dataloader=train_dataloader,
-    eval_dataloader=eval_dataloader,
-    batch_size=args.batch_size,
-    max_epochs=args.max_epochs,
-    accumulation_steps = args.accumulation_steps
-)
+                     strategy=strategy,
+                     optim=optim,
+                     train_dataloader=train_dataloader,
+                     eval_dataloader=eval_dataloader,
+                     batch_size=args.batch_size,
+                     max_epochs=args.max_epochs,
+                     accumulation_steps=args.accumulation_steps
+                     )

 trainer.fit()
 # this saves in pytorch format
 strategy.save_model(model, args.save_path, only_rank0=True)

-# this saves in HF format. ColossalAI strategy with stage-3 doesn't support this method
+# this saves in HF format
 strategy.save_pretrained(model, args.save_path, only_rank0=True, tokenizer=tokenizer)
 ```

@@ -269,12 +334,13 @@ strategy.save_pretrained(model, args.save_path, only_rank0=True, tokenizer=token
 Here are some examples that can allow you to train a 7B model on a single or multiple consumer-grade GPUs.

 If you only have a single 24G GPU, you can use the following script. `batch_size`, `lora_rank` and `grad_checkpoint` are the most important parameters to successfully train the model.
-```
+
+```bash
+// [INFO]: MAX GPU MEMORY ALLOCATED:  19148.9345703125 MB
 torchrun --standalone --nproc_per_node=1 train_sft.py \
    --pretrain "/path/to/LLaMa-7B/" \
    --model 'llama' \
-    --strategy naive \
-    --log_interval 10 \
+    --strategy ddp \
    --save_path  /path/to/Coati-7B \
    --dataset /path/to/data.json \
    --batch_size 1 \
@@ -287,12 +353,12 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
 ```

 `colossalai_gemini` strategy can enable a single 24G GPU to train the whole model without using LoRA if you have sufficient CPU memory. You can use the following script.
-```
+
+```bash
 torchrun --standalone --nproc_per_node=1 train_sft.py \
    --pretrain "/path/to/LLaMa-7B/" \
    --model 'llama' \
    --strategy colossalai_gemini \
-    --log_interval 10 \
    --save_path  /path/to/Coati-7B \
    --dataset /path/to/data.json \
    --batch_size 1 \
@@ -304,12 +370,12 @@ torchrun --standalone --nproc_per_node=1 train_sft.py \
 ```

 If you have 4x32 GB GPUs, you can even train the whole 7B model using our `colossalai_zero2_cpu` strategy! The script is given as follows.
-```
+
+```bash
 torchrun --standalone --nproc_per_node=4 train_sft.py \
    --pretrain "/path/to/LLaMa-7B/" \
    --model 'llama' \
    --strategy colossalai_zero2_cpu \
-    --log_interval 10 \
    --save_path  /path/to/Coati-7B \
    --dataset /path/to/data.json \
    --batch_size 1 \
@@ -319,8 +385,8 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
    --max_epochs 1 \
    --grad_checkpoint
 ```
-</details>

+</details>

 ## The Plan

@@ -335,31 +401,33 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
 - [ ] support chain-of-thought by [langchain](https://github.com/hwchase17/langchain)

 ### Real-time progress
-You will find our progress in github project broad

-[Coati](https://github.com/orgs/hpcaitech/projects/17/views/1)
+You will find our progress in github [project broad](https://github.com/orgs/hpcaitech/projects/17/views/1).

 ## Invitation to open-source contribution
+
 Referring to the successful attempts of [BLOOM](https://bigscience.huggingface.co/) and [Stable Diffusion](https://en.wikipedia.org/wiki/Stable_Diffusion), any and all developers and partners with computing powers, datasets, models are welcome to join and build the Colossal-AI community, making efforts towards the era of big AI models from the starting point of replicating ChatGPT!

 You may contact us or participate in the following ways:
+
 1. [Leaving a Star ⭐](https://github.com/hpcaitech/ColossalAI/stargazers) to show your like and support. Thanks!
 2. Posting an [issue](https://github.com/hpcaitech/ColossalAI/issues/new/choose), or submitting a PR on GitHub follow the guideline in [Contributing](https://github.com/hpcaitech/ColossalAI/blob/main/CONTRIBUTING.md).
 3. Join the Colossal-AI community on
-[Slack](https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w),
-and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas.
+   [Slack](https://github.com/hpcaitech/public_assets/tree/main/colossalai/contact/slack),
+   and [WeChat(微信)](https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png "qrcode") to share your ideas.
 4. Send your official proposal to email contact@hpcaitech.com

 Thanks so much to all of our amazing contributors!

 ## Quick Preview
+
 <div align="center">
   <a href="https://chat.colossalai.org/">
   <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/Chat-demo.png" width="700" />
   </a>
 </div>

- An open-source low cost solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline. [[demo]](https://chat.colossalai.org)
+- An open-source low-cost solution for cloning [ChatGPT](https://openai.com/blog/chatgpt/) with a complete RLHF pipeline. [[demo]](https://chat.colossalai.org)

 <p id="ChatGPT_scaling" align="center">
 <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/chatgpt/ChatGPT%20scaling.png" width=800/>
@@ -386,18 +454,21 @@ Thanks so much to all of our amazing contributors!
 | Better Cases  |     38 ⚔ **41**      |     **45** ⚔ 33      |
 |   Win Rate    |    48% ⚔ **52%**     |    **58%** ⚔ 42%     |
 | Average Score |   7.06 ⚔ **7.13**    |   **7.31** ⚔ 6.82    |
+
 - Our Coati-7B model performs better than Alpaca-7B when using GPT-4 to evaluate model performance. The Coati-7B model we evaluate is an old version we trained a few weeks ago and the new version is around the corner.

 ## Authors

 Coati is developed by ColossalAI Team:
+
 - [Fazzie](https://fazzie-key.cool/about/index.html)
 - [FrankLeeeee](https://github.com/FrankLeeeee)
 - [BlueRum](https://github.com/ht-zhou)
 - [ver217](https://github.com/ver217)
 - [ofey404](https://github.com/ofey404)
+- [Wenhao Chen](https://github.com/CWHer)

-The Phd student from [(HPC-AI) Lab](https://ai.comp.nus.edu.sg/) also contributed a lot to this project.
+The PhD student from [(HPC-AI) Lab](https://ai.comp.nus.edu.sg/) also contributed a lot to this project.
 - [Zangwei Zheng](https://github.com/zhengzangw)
 - [Xue Fuzhao](https://github.com/XueFuzhao)


--- a/applications/Chat/benchmarks/README.md
+++ b/applications/Chat/benchmarks/README.md
@@ -27,9 +27,12 @@ We also provide various training strategies:

 We only support `torchrun` to launch now. E.g.

-```shell
+```bash
 # run OPT-125M with no lora (lora_rank=0) on single-node single-GPU with min batch size
-torchrun --standalone --nproc_per_node 1 benchmark_opt_lora_dummy.py --model 125m --critic_model 125m --strategy ddp --experience_batch_size 1 --train_batch_size 1 --lora_rank 0
+torchrun --standalone --nproc_per_node 1 benchmark_opt_lora_dummy.py \
+    --model 125m --critic_model 125m --strategy ddp \
+    --experience_batch_size 1 --train_batch_size 1 --lora_rank 0
 # run Actor (OPT-1.3B) and Critic (OPT-350M) with lora_rank=4 on single-node 4-GPU
-torchrun --standalone --nproc_per_node 4 benchmark_opt_lora_dummy.py --model 1.3b --critic_model 350m --strategy colossalai_zero2 --lora_rank 4
+torchrun --standalone --nproc_per_node 4 benchmark_opt_lora_dummy.py \
+    --model 1.3b --critic_model 350m --strategy colossalai_zero2 --lora_rank 4
 ```
--- a/applications/Chat/benchmarks/benchmark_opt_lora_dummy.py
+++ b/applications/Chat/benchmarks/benchmark_opt_lora_dummy.py
@@ -8,7 +8,7 @@ from coati.models.base import RewardModel
 from coati.models.opt import OPTActor, OPTCritic
 from coati.trainer import PPOTrainer
 from coati.trainer.callbacks import PerformanceEvaluator
-from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, Strategy
+from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy, Strategy
 from torch.optim import Adam
 from torch.utils.data import DataLoader
 from transformers import AutoTokenizer
@@ -19,7 +19,7 @@ from colossalai.nn.optimizer import HybridAdam

 def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
    numel = sum(p.numel() for p in model.parameters())
-    if isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3 and strategy.shard_init:
+    if isinstance(strategy, GeminiStrategy) and strategy.shard_init:
        numel *= dist.get_world_size()
    return numel

@@ -27,7 +27,7 @@ def get_model_numel(model: nn.Module, strategy: Strategy) -> int:
 def preprocess_batch(samples) -> dict:
    input_ids = torch.stack(samples)
    attention_mask = torch.ones_like(input_ids, dtype=torch.long)
-    return {'input_ids': input_ids, 'attention_mask': attention_mask}
+    return {"input_ids": input_ids, "attention_mask": attention_mask}


 def print_rank_0(*args, **kwargs) -> None:
@@ -39,32 +39,32 @@ def print_model_numel(model_dict: dict) -> None:
    B = 1024**3
    M = 1024**2
    K = 1024
-    outputs = ''
+    outputs = ""
    for name, numel in model_dict.items():
-        outputs += f'{name}: '
+        outputs += f"{name}: "
        if numel >= B:
-            outputs += f'{numel / B:.2f} B\n'
+            outputs += f"{numel / B:.2f} B\n"
        elif numel >= M:
-            outputs += f'{numel / M:.2f} M\n'
+            outputs += f"{numel / M:.2f} M\n"
        elif numel >= K:
-            outputs += f'{numel / K:.2f} K\n'
+            outputs += f"{numel / K:.2f} K\n"
        else:
-            outputs += f'{numel}\n'
+            outputs += f"{numel}\n"
    print_rank_0(outputs)


 def get_gpt_config(model_name: str) -> OPTConfig:
    model_map = {
-        '125m': OPTConfig.from_pretrained('facebook/opt-125m'),
-        '350m': OPTConfig(hidden_size=1024, ffn_dim=4096, num_hidden_layers=24, num_attention_heads=16),
-        '700m': OPTConfig(hidden_size=1280, ffn_dim=5120, num_hidden_layers=36, num_attention_heads=20),
-        '1.3b': OPTConfig.from_pretrained('facebook/opt-1.3b'),
-        '2.7b': OPTConfig.from_pretrained('facebook/opt-2.7b'),
-        '3.5b': OPTConfig(hidden_size=3072, ffn_dim=12288, num_hidden_layers=32, num_attention_heads=32),
-        '5.5b': OPTConfig(hidden_size=3840, ffn_dim=15360, num_hidden_layers=32, num_attention_heads=32),
-        '6.7b': OPTConfig.from_pretrained('facebook/opt-6.7b'),
-        '10b': OPTConfig(hidden_size=5120, ffn_dim=20480, num_hidden_layers=32, num_attention_heads=32),
-        '13b': OPTConfig.from_pretrained('facebook/opt-13b'),
+        "125m": OPTConfig.from_pretrained("facebook/opt-125m"),
+        "350m": OPTConfig(hidden_size=1024, ffn_dim=4096, num_hidden_layers=24, num_attention_heads=16),
+        "700m": OPTConfig(hidden_size=1280, ffn_dim=5120, num_hidden_layers=36, num_attention_heads=20),
+        "1.3b": OPTConfig.from_pretrained("facebook/opt-1.3b"),
+        "2.7b": OPTConfig.from_pretrained("facebook/opt-2.7b"),
+        "3.5b": OPTConfig(hidden_size=3072, ffn_dim=12288, num_hidden_layers=32, num_attention_heads=32),
+        "5.5b": OPTConfig(hidden_size=3840, ffn_dim=15360, num_hidden_layers=32, num_attention_heads=32),
+        "6.7b": OPTConfig.from_pretrained("facebook/opt-6.7b"),
+        "10b": OPTConfig(hidden_size=5120, ffn_dim=20480, num_hidden_layers=32, num_attention_heads=32),
+        "13b": OPTConfig.from_pretrained("facebook/opt-13b"),
    }
    try:
        return model_map[model_name]
@@ -73,20 +73,20 @@ def get_gpt_config(model_name: str) -> OPTConfig:


 def main(args):
-    if args.strategy == 'ddp':
+    if args.strategy == "ddp":
        strategy = DDPStrategy()
-    elif args.strategy == 'colossalai_gemini':
-        strategy = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
-    elif args.strategy == 'colossalai_gemini_cpu':
-        strategy = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5)
-    elif args.strategy == 'colossalai_zero2':
-        strategy = ColossalAIStrategy(stage=2, placement_policy='cuda')
-    elif args.strategy == 'colossalai_zero2_cpu':
-        strategy = ColossalAIStrategy(stage=2, placement_policy='cpu')
-    elif args.strategy == 'colossalai_zero1':
-        strategy = ColossalAIStrategy(stage=1, placement_policy='cuda')
-    elif args.strategy == 'colossalai_zero1_cpu':
-        strategy = ColossalAIStrategy(stage=1, placement_policy='cpu')
+    elif args.strategy == "colossalai_gemini":
+        strategy = GeminiStrategy(placement_policy="static",initial_scale=2**5)
+    elif args.strategy == "colossalai_gemini_cpu":
+        strategy = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
+    elif args.strategy == "colossalai_zero2":
+        strategy = LowLevelZeroStrategy(stage=2, placement_policy="cuda")
+    elif args.strategy == "colossalai_zero2_cpu":
+        strategy = LowLevelZeroStrategy(stage=2, placement_policy="cpu")
+    elif args.strategy == "colossalai_zero1":
+        strategy = LowLevelZeroStrategy(stage=1, placement_policy="cuda")
+    elif args.strategy == "colossalai_zero1_cpu":
+        strategy = LowLevelZeroStrategy(stage=1, placement_policy="cpu")
    else:
        raise ValueError(f'Unsupported strategy "{args.strategy}"')

@@ -103,92 +103,106 @@ def main(args):

    if args.use_kernels:
        from coati.kernels import convert_to_xformer_model
-        actor, critic, initial_model, reward_model = map(convert_to_xformer_model,
-                                                         (actor, critic, initial_model, reward_model))
+
+        actor, critic, initial_model, reward_model = map(
+            convert_to_xformer_model, (actor, critic, initial_model, reward_model)
+        )

    actor_numel = get_model_numel(actor, strategy)
    critic_numel = get_model_numel(critic, strategy)
    initial_model_numel = get_model_numel(initial_model, strategy)
    reward_model_numel = get_model_numel(reward_model, strategy)
-    print_model_numel({
-        'Actor': actor_numel,
-        'Critic': critic_numel,
-        'Initial model': initial_model_numel,
-        'Reward model': reward_model_numel
-    })
-    performance_evaluator = PerformanceEvaluator(actor_numel,
-                                                 critic_numel,
-                                                 initial_model_numel,
-                                                 reward_model_numel,
-                                                 enable_grad_checkpoint=False,
-                                                 ignore_episodes=1)
-
-    if args.strategy.startswith('colossalai'):
+    print_model_numel(
+        {
+            "Actor": actor_numel,
+            "Critic": critic_numel,
+            "Initial model": initial_model_numel,
+            "Reward model": reward_model_numel,
+        }
+    )
+    performance_evaluator = PerformanceEvaluator(
+        actor_numel,
+        critic_numel,
+        initial_model_numel,
+        reward_model_numel,
+        enable_grad_checkpoint=False,
+        ignore_episodes=1,
+    )
+
+    if args.strategy.startswith("colossalai"):
        actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
        critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
    else:
        actor_optim = Adam(actor.parameters(), lr=5e-6)
        critic_optim = Adam(critic.parameters(), lr=5e-6)

-    tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
+    tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "left"

    (actor, actor_optim), (critic, critic_optim) = strategy.prepare((actor, actor_optim), (critic, critic_optim))

-    trainer = PPOTrainer(strategy,
-                         actor,
-                         critic,
-                         reward_model,
-                         initial_model,
-                         actor_optim,
-                         critic_optim,
-                         ptx_coef=0,
-                         max_epochs=args.max_epochs,
-                         train_batch_size=args.train_batch_size,
-                         offload_inference_models=args.offload_inference_models,
-                         max_length=512,
-                         do_sample=True,
-                         temperature=1.0,
-                         top_k=50,
-                         use_cache=True,
-                         pad_token_id=tokenizer.pad_token_id,
-                         eos_token_id=tokenizer.eos_token_id,
-                         callbacks=[performance_evaluator])
-
    random_prompts = torch.randint(tokenizer.vocab_size, (1000, 256), device=torch.cuda.current_device())
-    dataloader = DataLoader(random_prompts,
-                            batch_size=args.experience_batch_size,
-                            shuffle=True,
-                            collate_fn=preprocess_batch)
-
-    trainer.fit(dataloader,
-                None,
-                num_episodes=args.num_episodes,
-                max_timesteps=args.max_timesteps,
-                update_timesteps=args.update_timesteps)
-
-    print_rank_0(f'Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB')
-
-
-if __name__ == '__main__':
+    dataloader = DataLoader(
+        random_prompts, batch_size=args.experience_batch_size, shuffle=True, collate_fn=preprocess_batch
+    )
+
+    trainer = PPOTrainer(
+        strategy,
+        actor,
+        critic,
+        reward_model,
+        initial_model,
+        actor_optim,
+        critic_optim,
+        tokenizer=tokenizer,
+        ptx_coef=0,
+        train_batch_size=args.train_batch_size,
+        offload_inference_models=args.offload_inference_models,
+        max_length=512,
+        do_sample=True,
+        temperature=1.0,
+        top_k=50,
+        use_cache=True,
+        callbacks=[performance_evaluator],
+    )
+
+    trainer.fit(
+        prompt_dataloader=dataloader,
+        pretrain_dataloader=None,
+        num_episodes=args.num_episodes,
+        num_update_steps=args.num_update_steps,
+        num_collect_steps=args.num_collect_steps,
+    )
+
+    print_rank_0(f"Peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.2f} GB")
+
+
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument('--model', default='125m')
-    parser.add_argument('--critic_model', default='125m')
-    parser.add_argument('--strategy',
-                        choices=[
-                            'ddp', 'colossalai_gemini', 'colossalai_gemini_cpu', 'colossalai_zero2',
-                            'colossalai_zero2_cpu', 'colossalai_zero1', 'colossalai_zero1_cpu'
-                        ],
-                        default='ddp')
-    parser.add_argument('--num_episodes', type=int, default=3)
-    parser.add_argument('--max_timesteps', type=int, default=8)
-    parser.add_argument('--update_timesteps', type=int, default=8)
-    parser.add_argument('--max_epochs', type=int, default=1)
-    parser.add_argument('--train_batch_size', type=int, default=8)
-    parser.add_argument('--experience_batch_size', type=int, default=8)
-    parser.add_argument('--lora_rank', type=int, default=0)
-    parser.add_argument('--cuda_mem_frac', type=float, default=1.0)
-    parser.add_argument('--offload_inference_models', action='store_true', default=False)
-    parser.add_argument('--use_kernels', action='store_true', default=False)
+    parser.add_argument("--model", default="125m")
+    parser.add_argument("--critic_model", default="125m")
+    parser.add_argument(
+        "--strategy",
+        choices=[
+            "ddp",
+            "colossalai_gemini",
+            "colossalai_gemini_cpu",
+            "colossalai_zero2",
+            "colossalai_zero2_cpu",
+            "colossalai_zero1",
+            "colossalai_zero1_cpu",
+        ],
+        default="ddp",
+    )
+    parser.add_argument("--num_episodes", type=int, default=3)
+    parser.add_argument("--num_collect_steps", type=int, default=8)
+    parser.add_argument("--num_update_steps", type=int, default=1)
+    parser.add_argument("--train_batch_size", type=int, default=8)
+    parser.add_argument("--experience_batch_size", type=int, default=8)
+    parser.add_argument("--lora_rank", type=int, default=0)
+    parser.add_argument("--cuda_mem_frac", type=float, default=1.0)
+    parser.add_argument("--offload_inference_models", action="store_true", default=False)
+    parser.add_argument("--use_kernels", action="store_true", default=False)
    args = parser.parse_args()
    main(args)