Merge branch 'main' of https://github.com/hpcaitech/ColossalAI

7bc5a8e3 · zhuwenwen · e6748d82 · 0f785cb1 · 7bc5a8e3 · 7bc5a8e3
Commit 7bc5a8e3 authored May 05, 2023 by zhuwenwen
20 changed files
--- a/.github/workflows/doc_test_on_pr.yml
+++ b/.github/workflows/doc_test_on_pr.yml
+name: Test Documentation on PR
+on:
+  pull_request:
+    # any change in the examples folder will trigger check for the corresponding example.
+    paths:
+      - 'docs/source/**.md'
+
+jobs:
+  # This is for changed example files detect and output a matrix containing all the corresponding directory name.
+  detect-changed-doc:
+    if: |
+        github.event.pull_request.draft == false &&
+        github.base_ref == 'main' &&
+        github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    outputs:
+      any_changed: ${{ steps.changed-files.outputs.any_changed }}
+      changed_files: ${{ steps.changed-files.outputs.all_changed_files }}
+    name: Detect changed example files
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Locate base commit
+        id: locate-base-sha
+        run: |
+            curBranch=$(git rev-parse --abbrev-ref HEAD)
+            commonCommit=$(git merge-base origin/main $curBranch)
+            echo $commonCommit
+            echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
+
+      - name: Get all changed example files
+        id: changed-files
+        uses: tj-actions/changed-files@v35
+        with:
+          base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
+          files: |
+            ./docs/source/**/*.md
+
+  # If no file is changed, it will prompt an error and shows the matrix do not have value.
+  check-changed-doc:
+    # Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
+    if: |
+        github.event.pull_request.draft == false &&
+        github.base_ref == 'main' &&
+        github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' &&
+        needs.detect-changed-doc.outputs.any_changed == 'true'
+    name: Test the changed Doc
+    needs: detect-changed-doc
+    runs-on: [self-hosted, gpu]
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+      options: --gpus all --rm
+    timeout-minutes: 20
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout ColossalAI-Documentation
+        uses: actions/checkout@v2
+        with:
+          path: './ColossalAI-Documentation'
+          repository: 'hpcaitech/ColossalAI-Documentation'
+
+      - name: Install Docer
+        run: |
+          pip install -v ./ColossalAI-Documentation/doc-build/third_party/hf-doc-builder
+          pip install -v ./ColossalAI-Documentation/doc-build
+
+      - name: Checkout ColossalAI
+        uses: actions/checkout@v3
+
+      - name: Install Doc Test Requirements
+        run: |
+          source activate pytorch
+          conda env update --file docs/conda-doc-test-deps.yml --prune
+          pip install -r docs/requirements-doc-test.txt
+
+      - name: Install ColossalAI
+        run: |
+          source activate pytorch
+          pip install -v .
+
+      - name: Test the Doc
+        run: |
+          source activate pytorch
+          for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
+            echo "Testing $file now..."
+            docer test -p $file
+          done
+        env:
+          NCCL_SHM_DISABLE: 1
--- a/.github/workflows/doc_test_on_schedule.yml
+++ b/.github/workflows/doc_test_on_schedule.yml
+name: Test Documentation on Schedule
+on:
+  # run at 07:00 of every Sunday(singapore time) so here is UTC time Saturday 23:00
+  schedule:
+    - cron:  '0 23 * * 6'
+  workflow_dispatch:
+
+jobs:
+  check-changed-doc:
+    # Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
+    if: github.repository == 'hpcaitech/ColossalAI'
+    name: Test the changed Doc
+    runs-on: [self-hosted, gpu]
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+      options: --gpus all --rm
+    timeout-minutes: 60
+    steps:
+      - name: Checkout ColossalAI-Documentation
+        uses: actions/checkout@v2
+        with:
+          path: './ColossalAI-Documentation'
+          repository: 'hpcaitech/ColossalAI-Documentation'
+
+      - name: Install Docer
+        run: |
+          pip install -v ./ColossalAI-Documentation/doc-build/third_party/hf-doc-builder
+          pip install -v ./ColossalAI-Documentation/doc-build
+
+      - name: Checkout ColossalAI
+        uses: actions/checkout@v3
+
+      - name: Install ColossalAI
+        run: |
+          pip install -v .
+
+      - name: Install Doc Test Requirements
+        run: |
+          pip install -r docs/requirements-doc-test.txt
+
+      - name: Test the Doc
+        run: |
+          for file in $(find ./docs/source -name "*.md"); do
+            docer test -p $file
+          done
+        env:
+          NCCL_SHM_DISABLE: 1
--- a/.github/workflows/draft_github_release_post_after_merge.yml
+++ b/.github/workflows/draft_github_release_post_after_merge.yml
+name: Draft GitHub Release Post
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - 'version.txt'
+    types:
+      - closed
+
+jobs:
+  release:
+    name: Draft Release Post
+    if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.8.14'
+      - name: generate draft
+        id: generate_draft
+        run: |
+          version=v$(cat version.txt)
+          pip install requests
+          python ./.github/workflows/scripts/generate_release_draft.py --out $PWD/release_draft.md --version $version
+          echo "::set-output name=version::$version"
+          echo "::set-output name=path::$PWD/release_draft.md"
+        env:
+          GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Create Release
+        id: create_release
+        uses: actions/create-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          tag_name: ${{ steps.generate_draft.outputs.version }}
+          release_name: Version ${{ steps.generate_draft.outputs.version }} Release Today!
+          body_path: ${{ steps.generate_draft.outputs.path }}
+          draft: True
+          prerelease: false
--- a/.github/workflows/example_check_on_dispatch.yml
+++ b/.github/workflows/example_check_on_dispatch.yml
+name: Test Example on Dispatch
+on:
+  workflow_dispatch:
+    inputs:
+      example_directory:
+        type: string
+        description: example directory, separated by space. For example, language/gpt, images/vit. Simply input language or simply gpt does not work.
+        required: true
+
+jobs:
+  matrix_preparation:
+    if: |
+        github.event.pull_request.draft == false &&
+        github.base_ref == 'main' &&
+        github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
+    name: Check the examples user want
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+    - name: 📚 Checkout
+      uses: actions/checkout@v3
+    - name: Set up matrix
+      id: set-matrix
+      env:
+        check_dir: ${{ inputs.example_directory }}
+      run: |
+        res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
+        if [ res == "failure" ];then
+          exit -1
+        fi
+        dirs="[${check_dir}]"
+        echo "Testing examples in $dirs"
+        echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
+
+  test_example:
+    if: |
+        github.event.pull_request.draft == false &&
+        github.base_ref == 'main' &&
+        github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
+    name: Manually check example files
+    needs: manual_check_matrix_preparation
+    runs-on: [self-hosted, gpu]
+    strategy:
+      fail-fast: false
+      matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+      options: --gpus all --rm -v /data/scratch/examples-data:/data/
+    timeout-minutes: 10
+    steps:
+      - name: 📚 Checkout
+        uses: actions/checkout@v3
+      - name: Install Colossal-AI
+        run: |
+          pip install -v .
+      - name: Test the example
+        run: |
+          dir=${{ matrix.directory }}
+          echo "Testing ${dir} now"
+          cd "${PWD}/examples/${dir}"
+          bash test_ci.sh
+        env:
+          NCCL_SHM_DISABLE: 1
--- a/.github/workflows/example_check_on_pr.yml
+++ b/.github/workflows/example_check_on_pr.yml
+name: Test Example on PR
+on:
+  pull_request:
+    # any change in the examples folder will trigger check for the corresponding example.
+    paths:
+      - 'examples/**'
+
+jobs:
+  # This is for changed example files detect and output a matrix containing all the corresponding directory name.
+  detect-changed-example:
+    if: |
+        github.event.pull_request.draft == false &&
+        github.base_ref == 'main' &&
+        github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.setup-matrix.outputs.matrix }}
+      anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
+    name: Detect changed example files
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Locate base commit
+        id: locate-base-sha
+        run: |
+            curBranch=$(git rev-parse --abbrev-ref HEAD)
+            commonCommit=$(git merge-base origin/main $curBranch)
+            echo $commonCommit
+            echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
+
+      - name: Get all changed example files
+        id: changed-files
+        uses: tj-actions/changed-files@v35
+        with:
+          base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
+
+      - name: setup matrix
+        id: setup-matrix
+        run: |
+          changedFileName=""
+          for file in ${{ steps.changed-files.outputs.all_changed_files  }}; do
+            changedFileName="${file}:${changedFileName}"
+          done
+          echo "$changedFileName was changed"
+          res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName`
+          echo "All changed examples are $res"
+
+          if [ "$res" == "[]" ]; then
+            echo "anyChanged=false" >> $GITHUB_OUTPUT
+            echo "matrix=null" >> $GITHUB_OUTPUT
+          else
+            dirs=$( IFS=',' ; echo "${res[*]}" )
+            echo "anyChanged=true" >> $GITHUB_OUTPUT
+            echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
+          fi
+
+  # If no file is changed, it will prompt an error and shows the matrix do not have value.
+  check-changed-example:
+    # Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
+    if: |
+        github.event.pull_request.draft == false &&
+        github.base_ref == 'main' &&
+        github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' &&
+        needs.detect-changed-example.outputs.anyChanged == 'true'
+    name: Test the changed example
+    needs: detect-changed-example
+    runs-on: [self-hosted, gpu]
+    strategy:
+      fail-fast: false
+      matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+      options: --gpus all --rm -v /data/scratch/examples-data:/data/
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install Colossal-AI
+        run: |
+          pip install -v .
+
+      - name: Test the example
+        run: |
+          example_dir=${{ matrix.directory }}
+          cd "${PWD}/examples/${example_dir}"
+          bash test_ci.sh
+        env:
+          NCCL_SHM_DISABLE: 1
--- a/.github/workflows/example_check_on_schedule.yml
+++ b/.github/workflows/example_check_on_schedule.yml
+name: Test Example on Schedule
+on:
+  # run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
+  schedule:
+    - cron:  '0 16 * * 6'
+  workflow_dispatch:
+
+jobs:
+  # This is for all files' weekly check. Specifically, this job is to find all the directories.
+  matrix_preparation:
+    if: github.repository == 'hpcaitech/ColossalAI'
+    name: Prepare matrix for weekly check
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.setup-matrix.outputs.matrix }}
+    steps:
+    - name: 📚 Checkout
+      uses: actions/checkout@v3
+
+    - name: setup matrix
+      id: setup-matrix
+      run: |
+        res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
+        all_loc=$( IFS=',' ; echo "${res[*]}" )
+        echo "Found the examples: $all_loc"
+        echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
+
+  weekly_check:
+    if: github.repository == 'hpcaitech/ColossalAI'
+    name: Weekly check all examples
+    needs: matrix_preparation
+    runs-on: [self-hosted, gpu]
+    strategy:
+      fail-fast: false
+      matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+    timeout-minutes: 10
+    steps:
+      - name: 📚 Checkout
+        uses: actions/checkout@v3
+
+      - name: Install Colossal-AI
+        run: |
+          pip install -v .
+
+      - name: Traverse all files
+        run: |
+          example_dir=${{ matrix.directory }}
+          echo "Testing ${example_dir} now"
+          cd "${PWD}/examples/${example_dir}"
+          bash test_ci.sh
+        env:
+          NCCL_SHM_DISABLE: 1
+
+      - name: Notify Lark
+        id: message-preparation
+        if: ${{ failure() }}
+        run: |
+          url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
+          msg="Example tests failed for $EXAMPLE_DIR, please visit $url for details"
+          echo $msg
+          python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
+        env:
+          SERVER_URL: ${{github.server_url }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
+          WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
+          EXAMPLE_DIR: ${{ matrix.directory }}
--- a/.github/workflows/post_commit.yml
+++ b/.github/workflows/post_commit.yml
+name: post-commit
+
+on:
+  pull_request:
+    types:
+        - closed
+
+jobs:
+  # this job will run after a PR is merged to run pre-commit on any changed file
+  # so that the user does not need to learn pre-commit and pre-commit can still
+  # be auto-executed by the workflow
+  pre-commit:
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.merged == true && github.repository == 'hpcaitech/ColossalAI'
+    steps:
+    - uses: actions/checkout@v2
+      with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.sha }}
+
+    # the PR branch and the hpcaitech/colossal-ai main branch
+    # must share a common commit, we need to locate that commit,
+    # which is the commit checked-out or forked when the PR branch is created
+    # such that we can look for files changed since that commit
+    - name: Locate base commit
+      id: locate-base-sha
+      run: |
+          curBranch=$(git rev-parse --abbrev-ref HEAD)
+          commonCommit=$(git merge-base origin/main $curBranch)
+          echo $commonCommit
+          echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
+
+    - name: Find the changed files
+      id: find-changed-files
+      uses: tj-actions/changed-files@v35
+      with:
+        base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
+
+    - name: List all changed files
+      run: |
+        for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
+          echo "$file was changed"
+        done
+
+    # check out the main branch
+    - uses: actions/checkout@v2
+      with:
+        ref: 'main'
+
+    - uses: actions/setup-python@v3
+
+    - name: Cache pre-commit hooks
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pre-commit
+        key: ${{ runner.os }}-pre-commit-hooks
+
+    - name: Set up pre-commit
+      run: |
+        pip install pre-commit
+        pre-commit install
+
+    # run pre-commit on changed files
+    - name: Run Pre-commit
+      run: |
+        for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
+          pre-commit run --files $file || true
+        done
+
+    # create commit for pre-commit
+    # when all files are well formatted, there is no need to create a commit
+    # therefore, this step will produce an error, which should be allowed
+    - name: Create commits
+      id: commit
+      continue-on-error: true
+      run: |
+        git config --global user.name 'github-actions'
+        git config --global user.email 'github-actions@github.com'
+        git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
+        git add -A
+        git commit -am "[format] applied code formatting on changed files in pull request ${{ github.event.pull_request.number }}"
+
+    # create pull request
+    - name: Create Pull Request
+      if: steps.commit.outcome == 'success'
+      id: cpr
+      uses: peter-evans/create-pull-request@v4
+      with:
+        branch: pre-commit-${{ github.event.pull_request.number }}
+        title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
+
+    - name: Enable Auto-merge for the New PR
+      if: steps.commit.outcome == 'success'
+      uses: peter-evans/enable-pull-request-automerge@v2
+      with:
+        pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
+        merge-method: squash
--- a/.github/workflows/release_docker_after_merge.yml
+++ b/.github/workflows/release_docker_after_merge.yml
+name: Publish Docker Image to DockerHub after Merge
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - 'version.txt'
+    types:
+      - closed
+
+jobs:
+  release:
+    name: Publish Docker Image to DockerHub
+    if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
+    runs-on: [self-hosted, gpu]
+    container:
+      image: "hpcaitech/docker-in-docker:latest"
+      options: --gpus all --rm -v /var/run/docker.sock:/var/run/docker.sock
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Build Docker
+        id: build
+        run: |
+          version=$(cat version.txt)
+          tag=hpcaitech/colossalai:$version
+          docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 -t $tag ./docker
+          echo "tag=${tag}" >> $GITHUB_OUTPUT
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Push Docker image
+        id: docker-push
+        run: |
+          docker push ${{ steps.build.outputs.tag }}
+
+  notify:
+    name: Notify Lark via webhook
+    needs: release
+    runs-on: ubuntu-latest
+    if: ${{ always() }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.8.14'
+
+      - name: Install requests
+        run: pip install requests
+
+      - name: Notify Lark
+        id: message-preparation
+        run: |
+          url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
+          if [ "$STATUS" == 'success' ]
+          then
+            msg="The Docker image for the latest release has been successfully built and pushed to DockerHub."
+          else
+            msg="Failed to build and push the Docker image for the latest release, please visit $url for details."
+          fi
+          echo $msg
+          python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
+        env:
+          SERVER_URL: ${{github.server_url }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
+          WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
+          STATUS: ${{ needs.release.result }}
--- a/.github/workflows/release_nightly_on_schedule.yml
+++ b/.github/workflows/release_nightly_on_schedule.yml
+name: Publish Nightly Version to PyPI
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron:  '0 0 * * 6' # release on every Sunday 00:00 UTC time
+
+jobs:
+  build-n-publish:
+    if: github.repository == 'hpcaitech/ColossalAI'
+    name: Build and publish Python 🐍 distributions 📦 to PyPI
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.8.14'
+
+    - run: NIGHTLY=1 python setup.py sdist build
+
+    # publish to PyPI if executed on the main branch
+    - name: Publish package to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      id: publish
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        verbose: true
+
+  notify:
+    name: Notify Lark via webhook
+    needs: build-n-publish
+    runs-on: ubuntu-latest
+    if: ${{ always() }} && github.repository == 'hpcaitech/ColossalAI'
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.8.14'
+
+      - name: Install requests
+        run: pip install requests
+
+      - name: Notify Lark
+        id: message-preparation
+        run: |
+          url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
+
+          if [ $STATUS == 'success' ]
+          then
+            msg="The Colossal-AI nightly version has been successfully released to PyPI."
+          else
+            msg="Failed to release Colossal-AI nightly version to PyPI, please visit $url for details."
+          fi
+          echo $msg
+          python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
+        env:
+          SERVER_URL: ${{github.server_url }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
+          WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
+          STATUS: ${{ steps.publish.outcome }}
--- a/.github/workflows/release_pypi_after_merge.yml
+++ b/.github/workflows/release_pypi_after_merge.yml
+name: Publish to PyPI
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - 'version.txt'
+    types:
+      - closed
+
+jobs:
+  build-n-publish:
+    if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI' && github.event.pull_request.merged == true && github.base_ref == 'main'
+    name: Build and publish Python 🐍 distributions 📦 to PyPI
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.8.14'
+
+    - run: python setup.py sdist build
+
+    # publish to PyPI if executed on the main branch
+    - name: Publish package to PyPI
+      id: publish
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+        verbose: true
+
+  notify:
+    name: Notify Lark via webhook
+    needs: build-n-publish
+    runs-on: ubuntu-latest
+    if: ${{ always() }}
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.8.14'
+
+      - name: Install requests
+        run: pip install requests
+
+      - name: Notify Lark
+        id: message-preparation
+        run: |
+          url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
+
+          if [ "$STATUS" == 'success' ]
+          then
+            msg="The Colossal-AI latest version has been successfully released to PyPI."
+          else
+            msg="Failed to release Colossal-AI to PyPI, please visit $url for details."
+          fi
+          echo $msg
+          python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
+        env:
+          SERVER_URL: ${{github.server_url }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
+          WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
+          STATUS: ${{ needs.build-n-publish.result }}
--- a/.github/workflows/release_test_pypi_before_merge.yml
+++ b/.github/workflows/release_test_pypi_before_merge.yml
+name: Publish to Test-PyPI Before Merge
+
+on:
+  pull_request:
+    paths:
+      - 'version.txt'
+
+jobs:
+  build-n-publish:
+    if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI'
+    name: Build and publish Python 🐍 distributions 📦 to Test PyPI
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.8.14'
+
+    - name: add timestamp to the version
+      id: prep-version
+      run: |
+        version=$(cat version.txt)
+        timestamp=$(date +%s)
+        new_version="${version}.post${timestamp}"
+        echo $new_version > ./version.txt
+        echo "version=$new_version" >> $GITHUB_OUTPUT
+
+    - run: python setup.py sdist build
+
+    # publish to PyPI if executed on the main branch
+    - name: Publish package to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        user: __token__
+        password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+        repository_url: https://test.pypi.org/legacy/
+        verbose: true
+
+    - name: Wait for Test-PyPI refresh
+      run: sleep 300s
+      shell: bash
+
+    - name: Try installation
+      run: |
+        # we need to install the requirements.txt first
+        # as test-pypi may not contain the distributions for libs listed in the txt file
+        pip install -r requirements/requirements.txt
+        pip install --index-url https://test.pypi.org/simple/ colossalai==$VERSION
+      env:
+        VERSION: ${{ steps.prep-version.outputs.version }}
--- a/.github/workflows/report_leaderboard_to_lark.yml
+++ b/.github/workflows/report_leaderboard_to_lark.yml
+name: Generate Community Report and Send to Lark
+
+on:
+  workflow_dispatch:
+  schedule:
+    # release on every Friday 09:00 UTC time, 17:00 Beijing/Singapore time
+    - cron:  '0 9 * * 5'
+
+jobs:
+  generate-and-publish:
+    if: github.repository == 'hpcaitech/ColossalAI'
+    name: Generate leaderboard report and publish to Lark
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.8.14'
+
+    - run: pip install requests matplotlib seaborn requests_toolbelt pytz
+
+    - run: python .github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
+      env:
+        LARK_APP_ID: ${{ secrets.LARK_LEADERBOARD_APP_ID }}
+        LARK_APP_SECRET: ${{ secrets.LARK_LEADERBOARD_APP_SECRET }}
+        LARK_WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
+        GITHUB_TOKEN: ${{ github.token }}
--- a/.github/workflows/report_test_coverage.yml
+++ b/.github/workflows/report_test_coverage.yml
+name: Report Test Coverage
+
+on:
+  workflow_run:
+    workflows: [Build on PR]
+    types:
+      - completed
+
+jobs:
+  report-test-coverage:
+    runs-on: ubuntu-latest
+    steps:
+      - name: 'Download artifact'
+        uses: actions/github-script@v6
+        with:
+          script: |
+            let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: context.payload.workflow_run.id,
+            });
+            let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "report"
+            })[0];
+            let download = await github.rest.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: 'zip',
+            });
+            let fs = require('fs');
+            fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/report.zip`, Buffer.from(download.data));
+
+      - name: 'Unzip artifact'
+        id: unzip
+        run: |
+          unzip report.zip
+          if [ -f "coverage.txt" ]; then
+            echo "hasReport=true" >> $GITHUB_OUTPUT
+          else
+            echo "hasReport=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Make Coverage Report Collapsable
+        if: steps.unzip.outputs.hasReport == 'true'
+        run: |
+          covNum=$(cat cov_number)
+          title="The code coverage for the changed files is ${covNum}%."
+          touch coverage_report.txt
+          echo $title >> coverage_report.txt
+          echo " " >> coverage_report.txt
+          echo "<details>" >> coverage_report.txt
+          echo "<summary>Click me to view the complete report</summary>" >> coverage_report.txt
+          echo " " >> coverage_report.txt
+          echo "\`\`\`" >> coverage_report.txt
+          cat coverage.txt >> coverage_report.txt
+          echo "\`\`\`" >> coverage_report.txt
+          echo "</details>" >> coverage_report.txt
+          mv coverage_report.txt coverage.txt
+
+      - name: 'Comment on PR'
+        if: steps.unzip.outputs.hasReport == 'true'
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            let fs = require('fs');
+            let issue_number = Number(fs.readFileSync('./pr_number'));
+            let owner = context.repo.owner;
+            let repo = context.repo.repo;
+            let run_id = context.payload.workflow_run.id;
+            let run_url = `https://github.com/${owner}/${repo}/actions/runs/${run_id}`
+            let body = fs.readFileSync('./coverage.txt', {encoding:'utf8', flag:'r'})
+
+            await github.rest.issues.createComment({
+              owner: owner,
+              repo: repo,
+              issue_number: issue_number,
+              body: body
+            });
--- a/.github/workflows/run_chatgpt_examples.yml
+++ b/.github/workflows/run_chatgpt_examples.yml
+name: Run ChatGPT examples
+
+on:
+  pull_request:
+    types: [synchronize, opened, reopened]
+    paths:
+      - 'applications/Chat/coati/**'
+      - 'applications/Chat/requirements.txt'
+      - 'applications/Chat/setup.py'
+      - 'applications/Chat/examples/**'
+
+
+jobs:
+  tests:
+    name: Run ChatGPT examples
+    if: |
+      github.event.pull_request.draft == false &&
+      github.base_ref == 'main' &&
+      github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
+    runs-on: [self-hosted, gpu]
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+      options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat
+    timeout-minutes: 30
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout ColossalAI
+        uses: actions/checkout@v2
+
+      - name: Install ColossalAI and ChatGPT
+        run: |
+          pip install -e .
+          cd applications/Chat
+          pip install -v .
+          pip install -r examples/requirements.txt
+
+      - name: Install Transformers
+        run: |
+          cd applications/Chat
+          git clone https://github.com/hpcaitech/transformers
+          cd transformers
+          pip install -v .
+
+      - name: Execute Examples
+        run: |
+          cd applications/Chat
+          rm -rf ~/.cache/colossalai
+          ./examples/test_ci.sh
+        env:
+          NCCL_SHM_DISABLE: 1
+          MAX_JOBS: 8
+          SFT_DATASET: /data/scratch/github_actions/chat/data.json
+          PROMPT_PATH: /data/scratch/github_actions/chat/prompts_en.jsonl
+          PRETRAIN_DATASET: /data/scratch/github_actions/chat/alpaca_data.json
--- a/.github/workflows/run_chatgpt_unit_tests.yml
+++ b/.github/workflows/run_chatgpt_unit_tests.yml
+name: Run ChatGPT unit tests
+
+on:
+  pull_request:
+    types: [synchronize, opened, reopened]
+    paths:
+      - 'applications/Chat/coati/**'
+      - 'applications/Chat/requirements.txt'
+      - 'applications/Chat/setup.py'
+      - 'applications/Chat/requirements-test.txt'
+      - 'applications/Chat/tests/**'
+      - 'applications/Chat/pytest.ini'
+
+jobs:
+  tests:
+    name: Run ChatGPT unit tests
+    if: |
+      github.event.pull_request.draft == false &&
+      github.base_ref == 'main' &&
+      github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
+    runs-on: [self-hosted, gpu]
+    container:
+      image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
+      options: --gpus all --rm -v /data/scratch/chatgpt:/data/scratch/chatgpt
+    timeout-minutes: 30
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout ColossalAI
+        uses: actions/checkout@v2
+
+      - name: Install ColossalAI and ChatGPT
+        run: |
+          pip install -e .
+          cd applications/Chat
+          pip install -v .
+          pip install -r requirements-test.txt
+
+      - name: Execute Unit Testing
+        run: |
+          cd applications/Chat
+          rm -rf ~/.cache/colossalai
+          pytest tests/
+        env:
+          NCCL_SHM_DISABLE: 1
+          MAX_JOBS: 8
--- a/.github/workflows/scripts/check_doc_i18n.py
+++ b/.github/workflows/scripts/check_doc_i18n.py
+import argparse
+import os
+
+
+def compare_dirs(dir1, dir2):
+    # First, we need to check if the two directories exist
+    if not os.path.exists(dir1) or not os.path.exists(dir2):
+        return False
+
+    # Now, we compare the list of items in each directory
+    items1 = os.listdir(dir1)
+    items2 = os.listdir(dir2)
+
+    # If the number of items in each directory is different, the directories are different
+    if len(items1) != len(items2):
+        return False
+
+    # For each item in the first directory, we check if there is a corresponding item in the second directory
+    for item in items1:
+        item_path1 = os.path.join(dir1, item)
+        item_path2 = os.path.join(dir2, item)
+
+        # If the corresponding item doesn't exist in the second directory, the directories are different
+        if not os.path.exists(item_path2):
+            print(f'Found mismatch: {item_path1}, {item_path2}')
+            return False
+
+        # If the corresponding item is a directory, we compare the two directories recursively
+        if os.path.isdir(item_path1) and os.path.isdir(item_path2):
+            if not compare_dirs(item_path1, item_path2):
+                print(f'Found mismatch: {item_path1}, {item_path2}')
+                return False
+
+        # both are files
+        elif os.path.isfile(item_path1) and os.path.isfile(item_path2):
+            continue
+
+        # If the corresponding item is not a file or a directory, the directories are different
+        else:
+            print(f'Found mismatch: {item_path1}, {item_path2}')
+            return False
+
+    # If all items are the same, the directories are the same
+    return True
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', '--directory', help="The directory where the multi-language source files are kept.")
+    args = parser.parse_args()
+
+    i18n_folders = os.listdir(args.directory)
+    i18n_folders = [os.path.join(args.directory, val) for val in i18n_folders]
+
+    if len(i18n_folders) > 1:
+        for i in range(1, len(i18n_folders)):
+            dir1 = i18n_folders[0]
+            dir2 = i18n_folders[i]
+            print(f'comparing {dir1} vs {dir2}')
+            match = compare_dirs(i18n_folders[0], i18n_folders[i])
+
+            if not match:
+                print(
+                    f"{dir1} and {dir2} don't match, please ensure that your documentation is available in different languages"
+                )
+            else:
+                print(f"{dir1} and {dir2} match")
--- a/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
+++ b/.github/workflows/scripts/example_checks/check_dispatch_inputs.py
+import argparse
+import os
+
+
+def check_inputs(input_list):
+    for path in input_list:
+        real_path = os.path.join('examples', path)
+        if not os.path.exists(real_path):
+            return False
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
+    args = parser.parse_args()
+    name_list = args.fileNameList.split(",")
+    is_correct = check_inputs(name_list)
+
+    if is_correct:
+        print('success')
+    else:
+        print('failure')
+
+
+if __name__ == '__main__':
+    main()
--- a/.github/workflows/scripts/example_checks/check_example_weekly.py
+++ b/.github/workflows/scripts/example_checks/check_example_weekly.py
+import os
+
+
+def show_files(path, all_files):
+    # Traverse all the folder/file in current directory
+    file_list = os.listdir(path)
+    # Determine the element is folder or file. If file, pass it into list, if folder, recurse.
+    for file_name in file_list:
+        # Get the abs directory using os.path.join() and store into cur_path.
+        cur_path = os.path.join(path, file_name)
+        # Determine whether folder
+        if os.path.isdir(cur_path):
+            show_files(cur_path, all_files)
+        else:
+            all_files.append(cur_path)
+    return all_files
+
+
+def join(input_list, sep=None):
+    return (sep or ' ').join(input_list)
+
+
+def main():
+    contents = show_files('examples/', [])
+    all_loc = []
+    for file_loc in contents:
+        split_loc = file_loc.split('/')
+        # must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
+        if len(split_loc) >= 4:
+            re_loc = '/'.join(split_loc[1:3])
+            if re_loc not in all_loc:
+                all_loc.append(re_loc)
+    print(all_loc)
+
+
+if __name__ == '__main__':
+    main()
--- a/.github/workflows/scripts/example_checks/detect_changed_example.py
+++ b/.github/workflows/scripts/example_checks/detect_changed_example.py
+import argparse
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
+    args = parser.parse_args()
+    name_list = args.fileNameList.split(":")
+    folder_need_check = set()
+    for loc in name_list:
+        # Find only the sub-sub-folder of 'example' folder
+        # the examples folder structure is like
+        # - examples
+        #   - area
+        #     - application
+        #       - file
+        if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
+            folder_need_check.add('/'.join(loc.split("/")[1:3]))
+    # Output the result using print. Then the shell can get the values.
+    print(list(folder_need_check))
+
+
+if __name__ == '__main__':
+    main()
--- a/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
+++ b/.github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
+import os
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Any, Dict, List
+
+import matplotlib.pyplot as plt
+import pytz
+import requests
+import seaborn
+from requests_toolbelt import MultipartEncoder
+
+
+@dataclass
+class Contributor:
+    """
+    Dataclass for a github contributor.
+
+    Args:
+        name (str): name of the contributor
+        num_commits_this_week (int): number of commits made within one week
+    """
+    name: str
+    num_commits_this_week: int
+
+
+def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
+    """
+    This function is a utility to plot the bar charts.
+    """
+    plt.clf()
+    seaborn.color_palette()
+    fig = seaborn.barplot(x=x, y=y)
+    fig.set(xlabel=xlabel, ylabel=ylabel, title=title)
+    seaborn.despine()
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=1200)
+
+
+def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]:
+    """
+    Retrive the issue/PR comments made by our members in the last 7 days.
+
+    Args:
+        github_token (str): GitHub access token for API calls
+        since (str): the path parameter required by GitHub Restful APIs, in the format of YYYY-MM-DDTHH:MM:SSZ
+    """
+    # prepare header
+    headers = {
+        'Authorization': f'Bearer {github_token}',
+        'Accept': 'application/vnd.github+json',
+        'X-GitHub-Api-Version': '2022-11-28'
+    }
+
+    user_engagement_count = {}
+
+    # do pagination to the API
+    page = 1
+    while True:
+        comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}'
+        comment_response = requests.get(comment_api, headers=headers).json()
+
+        if len(comment_response) == 0:
+            break
+        else:
+            for item in comment_response:
+                comment_author_relationship = item['author_association']
+                if comment_author_relationship != 'MEMBER':
+                    # if the comment is not made by our member
+                    # we don't count this comment towards user engagement
+                    continue
+
+                issue_id = item['issue_url'].split('/')[-1]
+                issue_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/{issue_id}'
+                issue_response = requests.get(issue_api, headers=headers).json()
+                issue_author_relationship = issue_response['author_association']
+
+                if issue_author_relationship != 'MEMBER':
+                    # this means that the issue/PR is not created by our own people
+                    # any comments in this issue/PR by our member will be counted towards the leaderboard
+                    member_name = item['user']['login']
+
+                    if member_name in user_engagement_count:
+                        user_engagement_count[member_name] += 1
+                    else:
+                        user_engagement_count[member_name] = 1
+            page += 1
+    return user_engagement_count
+
+
+def get_discussion_comments(github_token, since) -> Dict[str, int]:
+    """
+    Retrive the discussion comments made by our members in the last 7 days.
+    This is only available via the GitHub GraphQL API.
+
+    Args:
+        github_token (str): GitHub access token for API calls
+        since (Datetime): the query parameter to determine whether the comment is made this week
+    """
+
+    # use graphql to get the discussions updated in the last 7 days
+    def _generate_discussion_query(num, cursor: str = None):
+        if cursor is None:
+            offset_str = ""
+        else:
+            offset_str = f", after: \"{cursor}\""
+        query = f"""
+        {{
+            repository(owner: "hpcaitech", name: "ColossalAI"){{
+                discussions(first: {num} {offset_str}){{
+                    edges {{
+                        cursor
+                        node{{
+                            title
+                            author{{
+                                login
+                            }}
+                            number
+                            authorAssociation
+                            updatedAt
+                        }}
+                    }}
+                }}
+            }}
+        }}
+        """
+        return query
+
+    def _generate_comment_reply_count_for_discussion(discussion_number, num, cursor: str = None):
+        # here we assume that each comment will not have more than 100 replies for simplicity
+        # otherwise, we have to go through pagination for both comment and reply
+        if cursor is None:
+            offset_str = ""
+        else:
+            offset_str = f", before: \"{cursor}\""
+        query = f"""
+        {{
+            repository(owner: "hpcaitech", name: "ColossalAI"){{
+                discussion(number: {discussion_number}){{
+                    title
+                    comments(last: {num} {offset_str}){{
+                        edges{{
+                            cursor
+                            node {{
+                                author{{
+                                    login
+                                }}
+                                updatedAt
+                                authorAssociation
+                                replies (last: 100) {{
+                                edges {{
+                                    node {{
+                                        author {{
+                                            login
+                                        }}
+                                        updatedAt
+                                        authorAssociation
+                                        }}
+                                    }}
+                                }}
+                            }}
+                        }}
+                    }}
+                }}
+            }}
+        }}
+        """
+        return query
+
+    # a utility function to make call to Github GraphQL API
+    def _call_graphql_api(query):
+        headers = {"Authorization": f"Bearer {github_token}"}
+        json_data = {'query': query}
+        response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers)
+        data = response.json()
+        return data
+
+    # get the discussion numbers updated in the last 7 days
+    discussion_numbers = []
+    num_per_request = 10
+    cursor = None
+    while True:
+        query = _generate_discussion_query(num_per_request, cursor)
+        data = _call_graphql_api(query)
+        found_discussion_out_of_time_range = False
+
+        edges = data['data']['repository']['discussions']['edges']
+        if len(edges) == 0:
+            break
+        else:
+            # keep the discussion whose author is not a member
+            for edge in edges:
+                # print the discussion title
+                discussion = edge['node']
+
+                discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
+                # check if the updatedAt is within the last 7 days
+                # if yes, add it to dicussion_numbers
+                if discussion_updated_at > since:
+                    if discussion['authorAssociation'] != 'MEMBER':
+                        discussion_numbers.append(discussion['number'])
+                else:
+                    found_discussion_out_of_time_range = True
+
+        if found_discussion_out_of_time_range:
+            break
+        else:
+            # update cursor
+            cursor = edges[-1]['cursor']
+
+    # get the dicussion comments and replies made by our member
+    user_engagement_count = {}
+    for dicussion_number in discussion_numbers:
+        cursor = None
+        num_per_request = 10
+
+        while True:
+            query = _generate_comment_reply_count_for_discussion(dicussion_number, num_per_request, cursor)
+            data = _call_graphql_api(query)
+
+            # get the comments
+            edges = data['data']['repository']['discussion']['comments']['edges']
+
+            # update the cursor
+            if len(edges) == 0:
+                break
+            else:
+                # update cursor for pagination
+                cursor = edges[-1]['cursor']
+
+                for edge in edges:
+                    comment = edge['node']
+                    if comment['authorAssociation'] == 'MEMBER':
+                        # check if the updatedAt is within the last 7 days
+                        # if yes, add it to user_engagement_count
+                        comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
+                        if comment_updated_at > since:
+                            member_name = comment['author']['login']
+                            if member_name in user_engagement_count:
+                                user_engagement_count[member_name] += 1
+                            else:
+                                user_engagement_count[member_name] = 1
+
+                    # get the replies
+                    reply_edges = comment['replies']['edges']
+                    if len(reply_edges) == 0:
+                        continue
+                    else:
+                        for reply_edge in reply_edges:
+                            reply = reply_edge['node']
+                            if reply['authorAssociation'] == 'MEMBER':
+                                # check if the updatedAt is within the last 7 days
+                                # if yes, add it to dicussion_numbers
+                                reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
+                                if reply_updated_at > since:
+                                    member_name = reply['author']['login']
+                                    if member_name in user_engagement_count:
+                                        user_engagement_count[member_name] += 1
+                                    else:
+                                        user_engagement_count[member_name] = 1
+    return user_engagement_count
+
+
+def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool:
+    """
+    Generate the user engagement leaderboard image for stats within the last 7 days
+
+    Args:
+        github_token (str): GitHub access token for API calls
+        output_path (str): the path to save the image
+    """
+
+    # request to the Github API to get the users who have replied the most in the last 7 days
+    now = datetime.utcnow()
+    start_datetime = now - timedelta(days=7)
+    start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    # get the issue/PR comments and discussion comment count
+    issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
+    discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
+    total_engagement_count = {}
+
+    # update the total engagement count
+    total_engagement_count.update(issue_pr_engagement_count)
+    for name, count in discussion_engagement_count.items():
+        if name in total_engagement_count:
+            total_engagement_count[name] += count
+        else:
+            total_engagement_count[name] = count
+
+    # prepare the data for plotting
+    x = []
+    y = []
+
+    if len(total_engagement_count) > 0:
+        ranking = []
+        for name, count in total_engagement_count.items():
+            ranking.append((name, count))
+
+        ranking.sort(key=lambda x: x[1], reverse=True)
+
+        for name, count in ranking:
+            x.append(count)
+            y.append(name)
+
+        # use Shanghai time to display on the image
+        start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+        # plot the leaderboard
+        xlabel = f"Number of Comments made (since {start_datetime_str})"
+        ylabel = "Member"
+        title = 'Active User Engagement Leaderboard'
+        plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
+        return True
+    else:
+        return False
+
+
+def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
+    """
+    Generate the contributor leaderboard image for stats within the last 7 days
+
+    Args:
+        github_token (str): GitHub access token for API calls
+        output_path (str): the path to save the image
+    """
+    # request to the Github API to get the users who have contributed in the last 7 days
+    URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
+    headers = {
+        'Authorization': f'Bearer {github_token}',
+        'Accept': 'application/vnd.github+json',
+        'X-GitHub-Api-Version': '2022-11-28'
+    }
+
+    while True:
+        response = requests.get(URL, headers=headers).json()
+
+        if len(response) != 0:
+            # sometimes the Github API returns empty response for unknown reason
+            # request again if the response is empty
+            break
+
+    contributor_list = []
+
+    # get number of commits for each contributor
+    start_timestamp = None
+    for item in response:
+        num_commits_this_week = item['weeks'][-1]['c']
+        name = item['author']['login']
+        contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
+        contributor_list.append(contributor)
+
+        # update start_timestamp
+        start_timestamp = item['weeks'][-1]['w']
+
+    # convert unix timestamp to Beijing datetime
+    start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
+    start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    # sort by number of commits
+    contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
+
+    # remove contributors who has zero commits
+    contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0]
+
+    # prepare the data for plotting
+    x = [x.num_commits_this_week for x in contributor_list]
+    y = [x.name for x in contributor_list]
+
+    # plot
+    if len(x) > 0:
+        xlabel = f"Number of Commits (since {start_datetime_str})"
+        ylabel = "Contributor"
+        title = 'Active Contributor Leaderboard'
+        plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
+        return True
+    else:
+        return False
+
+
+def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
+    """
+    Upload image to Lark and return the image key
+
+    Args:
+        lark_tenant_token (str): Lark tenant access token
+        image_path (str): the path to the image to be uploaded
+    """
+    url = "https://open.feishu.cn/open-apis/im/v1/images"
+    form = {'image_type': 'message', 'image': (open(image_path, 'rb'))}    # 需要替换具体的path
+    multi_form = MultipartEncoder(form)
+    headers = {
+        'Authorization': f'Bearer {lark_tenant_token}',    ## 获取tenant_access_token, 需要替换为实际的token
+    }
+    headers['Content-Type'] = multi_form.content_type
+    response = requests.request("POST", url, headers=headers, data=multi_form).json()
+    return response['data']['image_key']
+
+
+def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
+    """
+    Generate Lark tenant access token.
+
+    Args:
+        app_id (str): Lark app id
+        app_secret (str): Lark app secret
+    """
+    url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal'
+    data = {'app_id': app_id, 'app_secret': app_secret}
+    response = requests.post(url, json=data).json()
+    return response['tenant_access_token']
+
+
+def send_image_to_lark(image_key: str, webhook_url: str) -> None:
+    """
+    Send image to Lark.
+
+    Args:
+        image_key (str): the image key returned by Lark
+        webhook_url (str): the webhook url to send the image
+    """
+    data = {"msg_type": "image", "content": {"image_key": image_key}}
+    requests.post(webhook_url, json=data)
+
+
+def send_message_to_lark(message: str, webhook_url: str):
+    """
+    Send message to Lark.
+
+    Args:
+        message (str): the message to be sent
+        webhook_url (str): the webhook url to send the message
+    """
+    data = {"msg_type": "text", "content": {"text": message}}
+    requests.post(webhook_url, json=data)
+
+
+if __name__ == '__main__':
+    GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
+    CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png'
+    USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
+
+    # generate images
+    contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
+    engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
+
+    # upload images
+    APP_ID = os.environ['LARK_APP_ID']
+    APP_SECRET = os.environ['LARK_APP_SECRET']
+    LARK_TENANT_TOKEN = generate_lark_tenant_access_token(app_id=APP_ID, app_secret=APP_SECRET)
+    contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH)
+    user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
+
+    # send message to lark
+    LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL']
+    message = """本周的社区榜单出炉啦！
+1. 开发贡献者榜单
+2. 用户互动榜单
+
+注：
+- 开发贡献者测评标准为：本周由公司成员提交的commit次数
+- 用户互动榜单测评标准为：本周由公司成员在非成员创建的issue/PR/discussion中回复的次数
+"""
+
+    send_message_to_lark(message, LARK_WEBHOOK_URL)
+
+    # send contributor image to lark
+    if contrib_success:
+        send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
+    else:
+        send_message_to_lark("本周没有成员贡献commit，无榜单图片生成。", LARK_WEBHOOK_URL)
+
+    # send user engagement image to lark
+    if engagement_success:
+        send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL)
+    else:
+        send_message_to_lark("本周没有成员互动，无榜单图片生成。", LARK_WEBHOOK_URL)