Commit 7bc5a8e3 authored by zhuwenwen's avatar zhuwenwen
Browse files
parents e6748d82 0f785cb1
name: Test Documentation on PR
on:
pull_request:
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'docs/source/**.md'
jobs:
# This is for changed example files detect and output a matrix containing all the corresponding directory name.
detect-changed-doc:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-files.outputs.any_changed }}
changed_files: ${{ steps.changed-files.outputs.all_changed_files }}
name: Detect changed example files
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Locate base commit
id: locate-base-sha
run: |
curBranch=$(git rev-parse --abbrev-ref HEAD)
commonCommit=$(git merge-base origin/main $curBranch)
echo $commonCommit
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
- name: Get all changed example files
id: changed-files
uses: tj-actions/changed-files@v35
with:
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
files: |
./docs/source/**/*.md
# If no file is changed, it will prompt an error and shows the matrix do not have value.
check-changed-doc:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' &&
needs.detect-changed-doc.outputs.any_changed == 'true'
name: Test the changed Doc
needs: detect-changed-doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm
timeout-minutes: 20
defaults:
run:
shell: bash
steps:
- name: Checkout ColossalAI-Documentation
uses: actions/checkout@v2
with:
path: './ColossalAI-Documentation'
repository: 'hpcaitech/ColossalAI-Documentation'
- name: Install Docer
run: |
pip install -v ./ColossalAI-Documentation/doc-build/third_party/hf-doc-builder
pip install -v ./ColossalAI-Documentation/doc-build
- name: Checkout ColossalAI
uses: actions/checkout@v3
- name: Install Doc Test Requirements
run: |
source activate pytorch
conda env update --file docs/conda-doc-test-deps.yml --prune
pip install -r docs/requirements-doc-test.txt
- name: Install ColossalAI
run: |
source activate pytorch
pip install -v .
- name: Test the Doc
run: |
source activate pytorch
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
echo "Testing $file now..."
docer test -p $file
done
env:
NCCL_SHM_DISABLE: 1
name: Test Documentation on Schedule
on:
# run at 07:00 of every Sunday(singapore time) so here is UTC time Saturday 23:00
schedule:
- cron: '0 23 * * 6'
workflow_dispatch:
jobs:
check-changed-doc:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: github.repository == 'hpcaitech/ColossalAI'
name: Test the changed Doc
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm
timeout-minutes: 60
steps:
- name: Checkout ColossalAI-Documentation
uses: actions/checkout@v2
with:
path: './ColossalAI-Documentation'
repository: 'hpcaitech/ColossalAI-Documentation'
- name: Install Docer
run: |
pip install -v ./ColossalAI-Documentation/doc-build/third_party/hf-doc-builder
pip install -v ./ColossalAI-Documentation/doc-build
- name: Checkout ColossalAI
uses: actions/checkout@v3
- name: Install ColossalAI
run: |
pip install -v .
- name: Install Doc Test Requirements
run: |
pip install -r docs/requirements-doc-test.txt
- name: Test the Doc
run: |
for file in $(find ./docs/source -name "*.md"); do
docer test -p $file
done
env:
NCCL_SHM_DISABLE: 1
name: Draft GitHub Release Post
on:
workflow_dispatch:
pull_request:
paths:
- 'version.txt'
types:
- closed
jobs:
release:
name: Draft Release Post
if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- name: generate draft
id: generate_draft
run: |
version=v$(cat version.txt)
pip install requests
python ./.github/workflows/scripts/generate_release_draft.py --out $PWD/release_draft.md --version $version
echo "::set-output name=version::$version"
echo "::set-output name=path::$PWD/release_draft.md"
env:
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.generate_draft.outputs.version }}
release_name: Version ${{ steps.generate_draft.outputs.version }} Release Today!
body_path: ${{ steps.generate_draft.outputs.path }}
draft: True
prerelease: false
name: Test Example on Dispatch
on:
workflow_dispatch:
inputs:
example_directory:
type: string
description: example directory, separated by space. For example, language/gpt, images/vit. Simply input language or simply gpt does not work.
required: true
jobs:
matrix_preparation:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
name: Check the examples user want
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Set up matrix
id: set-matrix
env:
check_dir: ${{ inputs.example_directory }}
run: |
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
if [ res == "failure" ];then
exit -1
fi
dirs="[${check_dir}]"
echo "Testing examples in $dirs"
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
test_example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
name: Manually check example files
needs: manual_check_matrix_preparation
runs-on: [self-hosted, gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
- name: Test the example
run: |
dir=${{ matrix.directory }}
echo "Testing ${dir} now"
cd "${PWD}/examples/${dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
name: Test Example on PR
on:
pull_request:
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'examples/**'
jobs:
# This is for changed example files detect and output a matrix containing all the corresponding directory name.
detect-changed-example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
name: Detect changed example files
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Locate base commit
id: locate-base-sha
run: |
curBranch=$(git rev-parse --abbrev-ref HEAD)
commonCommit=$(git merge-base origin/main $curBranch)
echo $commonCommit
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
- name: Get all changed example files
id: changed-files
uses: tj-actions/changed-files@v35
with:
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
- name: setup matrix
id: setup-matrix
run: |
changedFileName=""
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
changedFileName="${file}:${changedFileName}"
done
echo "$changedFileName was changed"
res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName`
echo "All changed examples are $res"
if [ "$res" == "[]" ]; then
echo "anyChanged=false" >> $GITHUB_OUTPUT
echo "matrix=null" >> $GITHUB_OUTPUT
else
dirs=$( IFS=',' ; echo "${res[*]}" )
echo "anyChanged=true" >> $GITHUB_OUTPUT
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
fi
# If no file is changed, it will prompt an error and shows the matrix do not have value.
check-changed-example:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' &&
needs.detect-changed-example.outputs.anyChanged == 'true'
name: Test the changed example
needs: detect-changed-example
runs-on: [self-hosted, gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
- name: Test the example
run: |
example_dir=${{ matrix.directory }}
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
name: Test Example on Schedule
on:
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
schedule:
- cron: '0 16 * * 6'
workflow_dispatch:
jobs:
# This is for all files' weekly check. Specifically, this job is to find all the directories.
matrix_preparation:
if: github.repository == 'hpcaitech/ColossalAI'
name: Prepare matrix for weekly check
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: setup matrix
id: setup-matrix
run: |
res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
all_loc=$( IFS=',' ; echo "${res[*]}" )
echo "Found the examples: $all_loc"
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
weekly_check:
if: github.repository == 'hpcaitech/ColossalAI'
name: Weekly check all examples
needs: matrix_preparation
runs-on: [self-hosted, gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
- name: Traverse all files
run: |
example_dir=${{ matrix.directory }}
echo "Testing ${example_dir} now"
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
- name: Notify Lark
id: message-preparation
if: ${{ failure() }}
run: |
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
msg="Example tests failed for $EXAMPLE_DIR, please visit $url for details"
echo $msg
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
env:
SERVER_URL: ${{github.server_url }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
EXAMPLE_DIR: ${{ matrix.directory }}
name: post-commit
on:
pull_request:
types:
- closed
jobs:
# this job will run after a PR is merged to run pre-commit on any changed file
# so that the user does not need to learn pre-commit and pre-commit can still
# be auto-executed by the workflow
pre-commit:
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true && github.repository == 'hpcaitech/ColossalAI'
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
# the PR branch and the hpcaitech/colossal-ai main branch
# must share a common commit, we need to locate that commit,
# which is the commit checked-out or forked when the PR branch is created
# such that we can look for files changed since that commit
- name: Locate base commit
id: locate-base-sha
run: |
curBranch=$(git rev-parse --abbrev-ref HEAD)
commonCommit=$(git merge-base origin/main $curBranch)
echo $commonCommit
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v35
with:
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
- name: List all changed files
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
echo "$file was changed"
done
# check out the main branch
- uses: actions/checkout@v2
with:
ref: 'main'
- uses: actions/setup-python@v3
- name: Cache pre-commit hooks
uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: ${{ runner.os }}-pre-commit-hooks
- name: Set up pre-commit
run: |
pip install pre-commit
pre-commit install
# run pre-commit on changed files
- name: Run Pre-commit
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
pre-commit run --files $file || true
done
# create commit for pre-commit
# when all files are well formatted, there is no need to create a commit
# therefore, this step will produce an error, which should be allowed
- name: Create commits
id: commit
continue-on-error: true
run: |
git config --global user.name 'github-actions'
git config --global user.email 'github-actions@github.com'
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
git add -A
git commit -am "[format] applied code formatting on changed files in pull request ${{ github.event.pull_request.number }}"
# create pull request
- name: Create Pull Request
if: steps.commit.outcome == 'success'
id: cpr
uses: peter-evans/create-pull-request@v4
with:
branch: pre-commit-${{ github.event.pull_request.number }}
title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
- name: Enable Auto-merge for the New PR
if: steps.commit.outcome == 'success'
uses: peter-evans/enable-pull-request-automerge@v2
with:
pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
merge-method: squash
name: Publish Docker Image to DockerHub after Merge
on:
workflow_dispatch:
pull_request:
paths:
- 'version.txt'
types:
- closed
jobs:
release:
name: Publish Docker Image to DockerHub
if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: "hpcaitech/docker-in-docker:latest"
options: --gpus all --rm -v /var/run/docker.sock:/var/run/docker.sock
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Build Docker
id: build
run: |
version=$(cat version.txt)
tag=hpcaitech/colossalai:$version
docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 -t $tag ./docker
echo "tag=${tag}" >> $GITHUB_OUTPUT
- name: Log in to Docker Hub
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Push Docker image
id: docker-push
run: |
docker push ${{ steps.build.outputs.tag }}
notify:
name: Notify Lark via webhook
needs: release
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- name: Install requests
run: pip install requests
- name: Notify Lark
id: message-preparation
run: |
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
if [ "$STATUS" == 'success' ]
then
msg="The Docker image for the latest release has been successfully built and pushed to DockerHub."
else
msg="Failed to build and push the Docker image for the latest release, please visit $url for details."
fi
echo $msg
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
env:
SERVER_URL: ${{github.server_url }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
STATUS: ${{ needs.release.result }}
name: Publish Nightly Version to PyPI
on:
workflow_dispatch:
schedule:
- cron: '0 0 * * 6' # release on every Sunday 00:00 UTC time
jobs:
build-n-publish:
if: github.repository == 'hpcaitech/ColossalAI'
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- run: NIGHTLY=1 python setup.py sdist build
# publish to PyPI if executed on the main branch
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
id: publish
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
verbose: true
notify:
name: Notify Lark via webhook
needs: build-n-publish
runs-on: ubuntu-latest
if: ${{ always() }} && github.repository == 'hpcaitech/ColossalAI'
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- name: Install requests
run: pip install requests
- name: Notify Lark
id: message-preparation
run: |
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
if [ $STATUS == 'success' ]
then
msg="The Colossal-AI nightly version has been successfully released to PyPI."
else
msg="Failed to release Colossal-AI nightly version to PyPI, please visit $url for details."
fi
echo $msg
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
env:
SERVER_URL: ${{github.server_url }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
STATUS: ${{ steps.publish.outcome }}
name: Publish to PyPI
on:
workflow_dispatch:
pull_request:
paths:
- 'version.txt'
types:
- closed
jobs:
build-n-publish:
if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI' && github.event.pull_request.merged == true && github.base_ref == 'main'
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- run: python setup.py sdist build
# publish to PyPI if executed on the main branch
- name: Publish package to PyPI
id: publish
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
verbose: true
notify:
name: Notify Lark via webhook
needs: build-n-publish
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- name: Install requests
run: pip install requests
- name: Notify Lark
id: message-preparation
run: |
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
if [ "$STATUS" == 'success' ]
then
msg="The Colossal-AI latest version has been successfully released to PyPI."
else
msg="Failed to release Colossal-AI to PyPI, please visit $url for details."
fi
echo $msg
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
env:
SERVER_URL: ${{github.server_url }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
STATUS: ${{ needs.build-n-publish.result }}
name: Publish to Test-PyPI Before Merge
on:
pull_request:
paths:
- 'version.txt'
jobs:
build-n-publish:
if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI'
name: Build and publish Python 🐍 distributions 📦 to Test PyPI
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- name: add timestamp to the version
id: prep-version
run: |
version=$(cat version.txt)
timestamp=$(date +%s)
new_version="${version}.post${timestamp}"
echo $new_version > ./version.txt
echo "version=$new_version" >> $GITHUB_OUTPUT
- run: python setup.py sdist build
# publish to PyPI if executed on the main branch
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
verbose: true
- name: Wait for Test-PyPI refresh
run: sleep 300s
shell: bash
- name: Try installation
run: |
# we need to install the requirements.txt first
# as test-pypi may not contain the distributions for libs listed in the txt file
pip install -r requirements/requirements.txt
pip install --index-url https://test.pypi.org/simple/ colossalai==$VERSION
env:
VERSION: ${{ steps.prep-version.outputs.version }}
name: Generate Community Report and Send to Lark
on:
workflow_dispatch:
schedule:
# release on every Friday 09:00 UTC time, 17:00 Beijing/Singapore time
- cron: '0 9 * * 5'
jobs:
generate-and-publish:
if: github.repository == 'hpcaitech/ColossalAI'
name: Generate leaderboard report and publish to Lark
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- run: pip install requests matplotlib seaborn requests_toolbelt pytz
- run: python .github/workflows/scripts/generate_leaderboard_and_send_to_lark.py
env:
LARK_APP_ID: ${{ secrets.LARK_LEADERBOARD_APP_ID }}
LARK_APP_SECRET: ${{ secrets.LARK_LEADERBOARD_APP_SECRET }}
LARK_WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
GITHUB_TOKEN: ${{ github.token }}
name: Report Test Coverage
on:
workflow_run:
workflows: [Build on PR]
types:
- completed
jobs:
report-test-coverage:
runs-on: ubuntu-latest
steps:
- name: 'Download artifact'
uses: actions/github-script@v6
with:
script: |
let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.payload.workflow_run.id,
});
let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
return artifact.name == "report"
})[0];
let download = await github.rest.actions.downloadArtifact({
owner: context.repo.owner,
repo: context.repo.repo,
artifact_id: matchArtifact.id,
archive_format: 'zip',
});
let fs = require('fs');
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/report.zip`, Buffer.from(download.data));
- name: 'Unzip artifact'
id: unzip
run: |
unzip report.zip
if [ -f "coverage.txt" ]; then
echo "hasReport=true" >> $GITHUB_OUTPUT
else
echo "hasReport=false" >> $GITHUB_OUTPUT
fi
- name: Make Coverage Report Collapsable
if: steps.unzip.outputs.hasReport == 'true'
run: |
covNum=$(cat cov_number)
title="The code coverage for the changed files is ${covNum}%."
touch coverage_report.txt
echo $title >> coverage_report.txt
echo " " >> coverage_report.txt
echo "<details>" >> coverage_report.txt
echo "<summary>Click me to view the complete report</summary>" >> coverage_report.txt
echo " " >> coverage_report.txt
echo "\`\`\`" >> coverage_report.txt
cat coverage.txt >> coverage_report.txt
echo "\`\`\`" >> coverage_report.txt
echo "</details>" >> coverage_report.txt
mv coverage_report.txt coverage.txt
- name: 'Comment on PR'
if: steps.unzip.outputs.hasReport == 'true'
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
let fs = require('fs');
let issue_number = Number(fs.readFileSync('./pr_number'));
let owner = context.repo.owner;
let repo = context.repo.repo;
let run_id = context.payload.workflow_run.id;
let run_url = `https://github.com/${owner}/${repo}/actions/runs/${run_id}`
let body = fs.readFileSync('./coverage.txt', {encoding:'utf8', flag:'r'})
await github.rest.issues.createComment({
owner: owner,
repo: repo,
issue_number: issue_number,
body: body
});
name: Run ChatGPT examples
on:
pull_request:
types: [synchronize, opened, reopened]
paths:
- 'applications/Chat/coati/**'
- 'applications/Chat/requirements.txt'
- 'applications/Chat/setup.py'
- 'applications/Chat/examples/**'
jobs:
tests:
name: Run ChatGPT examples
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/github_actions/chat:/data/scratch/github_actions/chat
timeout-minutes: 30
defaults:
run:
shell: bash
steps:
- name: Checkout ColossalAI
uses: actions/checkout@v2
- name: Install ColossalAI and ChatGPT
run: |
pip install -e .
cd applications/Chat
pip install -v .
pip install -r examples/requirements.txt
- name: Install Transformers
run: |
cd applications/Chat
git clone https://github.com/hpcaitech/transformers
cd transformers
pip install -v .
- name: Execute Examples
run: |
cd applications/Chat
rm -rf ~/.cache/colossalai
./examples/test_ci.sh
env:
NCCL_SHM_DISABLE: 1
MAX_JOBS: 8
SFT_DATASET: /data/scratch/github_actions/chat/data.json
PROMPT_PATH: /data/scratch/github_actions/chat/prompts_en.jsonl
PRETRAIN_DATASET: /data/scratch/github_actions/chat/alpaca_data.json
name: Run ChatGPT unit tests
on:
pull_request:
types: [synchronize, opened, reopened]
paths:
- 'applications/Chat/coati/**'
- 'applications/Chat/requirements.txt'
- 'applications/Chat/setup.py'
- 'applications/Chat/requirements-test.txt'
- 'applications/Chat/tests/**'
- 'applications/Chat/pytest.ini'
jobs:
tests:
name: Run ChatGPT unit tests
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/chatgpt:/data/scratch/chatgpt
timeout-minutes: 30
defaults:
run:
shell: bash
steps:
- name: Checkout ColossalAI
uses: actions/checkout@v2
- name: Install ColossalAI and ChatGPT
run: |
pip install -e .
cd applications/Chat
pip install -v .
pip install -r requirements-test.txt
- name: Execute Unit Testing
run: |
cd applications/Chat
rm -rf ~/.cache/colossalai
pytest tests/
env:
NCCL_SHM_DISABLE: 1
MAX_JOBS: 8
import argparse
import os
def compare_dirs(dir1, dir2):
# First, we need to check if the two directories exist
if not os.path.exists(dir1) or not os.path.exists(dir2):
return False
# Now, we compare the list of items in each directory
items1 = os.listdir(dir1)
items2 = os.listdir(dir2)
# If the number of items in each directory is different, the directories are different
if len(items1) != len(items2):
return False
# For each item in the first directory, we check if there is a corresponding item in the second directory
for item in items1:
item_path1 = os.path.join(dir1, item)
item_path2 = os.path.join(dir2, item)
# If the corresponding item doesn't exist in the second directory, the directories are different
if not os.path.exists(item_path2):
print(f'Found mismatch: {item_path1}, {item_path2}')
return False
# If the corresponding item is a directory, we compare the two directories recursively
if os.path.isdir(item_path1) and os.path.isdir(item_path2):
if not compare_dirs(item_path1, item_path2):
print(f'Found mismatch: {item_path1}, {item_path2}')
return False
# both are files
elif os.path.isfile(item_path1) and os.path.isfile(item_path2):
continue
# If the corresponding item is not a file or a directory, the directories are different
else:
print(f'Found mismatch: {item_path1}, {item_path2}')
return False
# If all items are the same, the directories are the same
return True
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', help="The directory where the multi-language source files are kept.")
args = parser.parse_args()
i18n_folders = os.listdir(args.directory)
i18n_folders = [os.path.join(args.directory, val) for val in i18n_folders]
if len(i18n_folders) > 1:
for i in range(1, len(i18n_folders)):
dir1 = i18n_folders[0]
dir2 = i18n_folders[i]
print(f'comparing {dir1} vs {dir2}')
match = compare_dirs(i18n_folders[0], i18n_folders[i])
if not match:
print(
f"{dir1} and {dir2} don't match, please ensure that your documentation is available in different languages"
)
else:
print(f"{dir1} and {dir2} match")
import argparse
import os
def check_inputs(input_list):
for path in input_list:
real_path = os.path.join('examples', path)
if not os.path.exists(real_path):
return False
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
args = parser.parse_args()
name_list = args.fileNameList.split(",")
is_correct = check_inputs(name_list)
if is_correct:
print('success')
else:
print('failure')
if __name__ == '__main__':
main()
import os
def show_files(path, all_files):
# Traverse all the folder/file in current directory
file_list = os.listdir(path)
# Determine the element is folder or file. If file, pass it into list, if folder, recurse.
for file_name in file_list:
# Get the abs directory using os.path.join() and store into cur_path.
cur_path = os.path.join(path, file_name)
# Determine whether folder
if os.path.isdir(cur_path):
show_files(cur_path, all_files)
else:
all_files.append(cur_path)
return all_files
def join(input_list, sep=None):
return (sep or ' ').join(input_list)
def main():
contents = show_files('examples/', [])
all_loc = []
for file_loc in contents:
split_loc = file_loc.split('/')
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
if len(split_loc) >= 4:
re_loc = '/'.join(split_loc[1:3])
if re_loc not in all_loc:
all_loc.append(re_loc)
print(all_loc)
if __name__ == '__main__':
main()
import argparse
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
args = parser.parse_args()
name_list = args.fileNameList.split(":")
folder_need_check = set()
for loc in name_list:
# Find only the sub-sub-folder of 'example' folder
# the examples folder structure is like
# - examples
# - area
# - application
# - file
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
folder_need_check.add('/'.join(loc.split("/")[1:3]))
# Output the result using print. Then the shell can get the values.
print(list(folder_need_check))
if __name__ == '__main__':
main()
import os
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Dict, List
import matplotlib.pyplot as plt
import pytz
import requests
import seaborn
from requests_toolbelt import MultipartEncoder
@dataclass
class Contributor:
"""
Dataclass for a github contributor.
Args:
name (str): name of the contributor
num_commits_this_week (int): number of commits made within one week
"""
name: str
num_commits_this_week: int
def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
"""
This function is a utility to plot the bar charts.
"""
plt.clf()
seaborn.color_palette()
fig = seaborn.barplot(x=x, y=y)
fig.set(xlabel=xlabel, ylabel=ylabel, title=title)
seaborn.despine()
plt.tight_layout()
plt.savefig(output_path, dpi=1200)
def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]:
"""
Retrive the issue/PR comments made by our members in the last 7 days.
Args:
github_token (str): GitHub access token for API calls
since (str): the path parameter required by GitHub Restful APIs, in the format of YYYY-MM-DDTHH:MM:SSZ
"""
# prepare header
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
}
user_engagement_count = {}
# do pagination to the API
page = 1
while True:
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}'
comment_response = requests.get(comment_api, headers=headers).json()
if len(comment_response) == 0:
break
else:
for item in comment_response:
comment_author_relationship = item['author_association']
if comment_author_relationship != 'MEMBER':
# if the comment is not made by our member
# we don't count this comment towards user engagement
continue
issue_id = item['issue_url'].split('/')[-1]
issue_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/{issue_id}'
issue_response = requests.get(issue_api, headers=headers).json()
issue_author_relationship = issue_response['author_association']
if issue_author_relationship != 'MEMBER':
# this means that the issue/PR is not created by our own people
# any comments in this issue/PR by our member will be counted towards the leaderboard
member_name = item['user']['login']
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
user_engagement_count[member_name] = 1
page += 1
return user_engagement_count
def get_discussion_comments(github_token, since) -> Dict[str, int]:
"""
Retrive the discussion comments made by our members in the last 7 days.
This is only available via the GitHub GraphQL API.
Args:
github_token (str): GitHub access token for API calls
since (Datetime): the query parameter to determine whether the comment is made this week
"""
# use graphql to get the discussions updated in the last 7 days
def _generate_discussion_query(num, cursor: str = None):
if cursor is None:
offset_str = ""
else:
offset_str = f", after: \"{cursor}\""
query = f"""
{{
repository(owner: "hpcaitech", name: "ColossalAI"){{
discussions(first: {num} {offset_str}){{
edges {{
cursor
node{{
title
author{{
login
}}
number
authorAssociation
updatedAt
}}
}}
}}
}}
}}
"""
return query
def _generate_comment_reply_count_for_discussion(discussion_number, num, cursor: str = None):
# here we assume that each comment will not have more than 100 replies for simplicity
# otherwise, we have to go through pagination for both comment and reply
if cursor is None:
offset_str = ""
else:
offset_str = f", before: \"{cursor}\""
query = f"""
{{
repository(owner: "hpcaitech", name: "ColossalAI"){{
discussion(number: {discussion_number}){{
title
comments(last: {num} {offset_str}){{
edges{{
cursor
node {{
author{{
login
}}
updatedAt
authorAssociation
replies (last: 100) {{
edges {{
node {{
author {{
login
}}
updatedAt
authorAssociation
}}
}}
}}
}}
}}
}}
}}
}}
}}
"""
return query
# a utility function to make call to Github GraphQL API
def _call_graphql_api(query):
headers = {"Authorization": f"Bearer {github_token}"}
json_data = {'query': query}
response = requests.post('https://api.github.com/graphql', json=json_data, headers=headers)
data = response.json()
return data
# get the discussion numbers updated in the last 7 days
discussion_numbers = []
num_per_request = 10
cursor = None
while True:
query = _generate_discussion_query(num_per_request, cursor)
data = _call_graphql_api(query)
found_discussion_out_of_time_range = False
edges = data['data']['repository']['discussions']['edges']
if len(edges) == 0:
break
else:
# keep the discussion whose author is not a member
for edge in edges:
# print the discussion title
discussion = edge['node']
discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
# check if the updatedAt is within the last 7 days
# if yes, add it to dicussion_numbers
if discussion_updated_at > since:
if discussion['authorAssociation'] != 'MEMBER':
discussion_numbers.append(discussion['number'])
else:
found_discussion_out_of_time_range = True
if found_discussion_out_of_time_range:
break
else:
# update cursor
cursor = edges[-1]['cursor']
# get the dicussion comments and replies made by our member
user_engagement_count = {}
for dicussion_number in discussion_numbers:
cursor = None
num_per_request = 10
while True:
query = _generate_comment_reply_count_for_discussion(dicussion_number, num_per_request, cursor)
data = _call_graphql_api(query)
# get the comments
edges = data['data']['repository']['discussion']['comments']['edges']
# update the cursor
if len(edges) == 0:
break
else:
# update cursor for pagination
cursor = edges[-1]['cursor']
for edge in edges:
comment = edge['node']
if comment['authorAssociation'] == 'MEMBER':
# check if the updatedAt is within the last 7 days
# if yes, add it to user_engagement_count
comment_updated_at = datetime.strptime(comment['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
if comment_updated_at > since:
member_name = comment['author']['login']
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
user_engagement_count[member_name] = 1
# get the replies
reply_edges = comment['replies']['edges']
if len(reply_edges) == 0:
continue
else:
for reply_edge in reply_edges:
reply = reply_edge['node']
if reply['authorAssociation'] == 'MEMBER':
# check if the updatedAt is within the last 7 days
# if yes, add it to dicussion_numbers
reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
if reply_updated_at > since:
member_name = reply['author']['login']
if member_name in user_engagement_count:
user_engagement_count[member_name] += 1
else:
user_engagement_count[member_name] = 1
return user_engagement_count
def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool:
"""
Generate the user engagement leaderboard image for stats within the last 7 days
Args:
github_token (str): GitHub access token for API calls
output_path (str): the path to save the image
"""
# request to the Github API to get the users who have replied the most in the last 7 days
now = datetime.utcnow()
start_datetime = now - timedelta(days=7)
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
# get the issue/PR comments and discussion comment count
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
total_engagement_count = {}
# update the total engagement count
total_engagement_count.update(issue_pr_engagement_count)
for name, count in discussion_engagement_count.items():
if name in total_engagement_count:
total_engagement_count[name] += count
else:
total_engagement_count[name] = count
# prepare the data for plotting
x = []
y = []
if len(total_engagement_count) > 0:
ranking = []
for name, count in total_engagement_count.items():
ranking.append((name, count))
ranking.sort(key=lambda x: x[1], reverse=True)
for name, count in ranking:
x.append(count)
y.append(name)
# use Shanghai time to display on the image
start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
# plot the leaderboard
xlabel = f"Number of Comments made (since {start_datetime_str})"
ylabel = "Member"
title = 'Active User Engagement Leaderboard'
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True
else:
return False
def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
"""
Generate the contributor leaderboard image for stats within the last 7 days
Args:
github_token (str): GitHub access token for API calls
output_path (str): the path to save the image
"""
# request to the Github API to get the users who have contributed in the last 7 days
URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
}
while True:
response = requests.get(URL, headers=headers).json()
if len(response) != 0:
# sometimes the Github API returns empty response for unknown reason
# request again if the response is empty
break
contributor_list = []
# get number of commits for each contributor
start_timestamp = None
for item in response:
num_commits_this_week = item['weeks'][-1]['c']
name = item['author']['login']
contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
contributor_list.append(contributor)
# update start_timestamp
start_timestamp = item['weeks'][-1]['w']
# convert unix timestamp to Beijing datetime
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai'))
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
# sort by number of commits
contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
# remove contributors who has zero commits
contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0]
# prepare the data for plotting
x = [x.num_commits_this_week for x in contributor_list]
y = [x.name for x in contributor_list]
# plot
if len(x) > 0:
xlabel = f"Number of Commits (since {start_datetime_str})"
ylabel = "Contributor"
title = 'Active Contributor Leaderboard'
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True
else:
return False
def upload_image_to_lark(lark_tenant_token: str, image_path: str) -> str:
"""
Upload image to Lark and return the image key
Args:
lark_tenant_token (str): Lark tenant access token
image_path (str): the path to the image to be uploaded
"""
url = "https://open.feishu.cn/open-apis/im/v1/images"
form = {'image_type': 'message', 'image': (open(image_path, 'rb'))} # 需要替换具体的path
multi_form = MultipartEncoder(form)
headers = {
'Authorization': f'Bearer {lark_tenant_token}', ## 获取tenant_access_token, 需要替换为实际的token
}
headers['Content-Type'] = multi_form.content_type
response = requests.request("POST", url, headers=headers, data=multi_form).json()
return response['data']['image_key']
def generate_lark_tenant_access_token(app_id: str, app_secret: str) -> str:
"""
Generate Lark tenant access token.
Args:
app_id (str): Lark app id
app_secret (str): Lark app secret
"""
url = 'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal'
data = {'app_id': app_id, 'app_secret': app_secret}
response = requests.post(url, json=data).json()
return response['tenant_access_token']
def send_image_to_lark(image_key: str, webhook_url: str) -> None:
"""
Send image to Lark.
Args:
image_key (str): the image key returned by Lark
webhook_url (str): the webhook url to send the image
"""
data = {"msg_type": "image", "content": {"image_key": image_key}}
requests.post(webhook_url, json=data)
def send_message_to_lark(message: str, webhook_url: str):
"""
Send message to Lark.
Args:
message (str): the message to be sent
webhook_url (str): the webhook url to send the message
"""
data = {"msg_type": "text", "content": {"text": message}}
requests.post(webhook_url, json=data)
if __name__ == '__main__':
GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png'
USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
# generate images
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH)
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
# upload images
APP_ID = os.environ['LARK_APP_ID']
APP_SECRET = os.environ['LARK_APP_SECRET']
LARK_TENANT_TOKEN = generate_lark_tenant_access_token(app_id=APP_ID, app_secret=APP_SECRET)
contributor_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, CONTRIBUTOR_IMAGE_PATH)
user_engagement_image_key = upload_image_to_lark(LARK_TENANT_TOKEN, USER_ENGAGEMENT_IMAGE_PATH)
# send message to lark
LARK_WEBHOOK_URL = os.environ['LARK_WEBHOOK_URL']
message = """本周的社区榜单出炉啦!
1. 开发贡献者榜单
2. 用户互动榜单
注:
- 开发贡献者测评标准为:本周由公司成员提交的commit次数
- 用户互动榜单测评标准为:本周由公司成员在非成员创建的issue/PR/discussion中回复的次数
"""
send_message_to_lark(message, LARK_WEBHOOK_URL)
# send contributor image to lark
if contrib_success:
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
else:
send_message_to_lark("本周没有成员贡献commit,无榜单图片生成。", LARK_WEBHOOK_URL)
# send user engagement image to lark
if engagement_success:
send_image_to_lark(user_engagement_image_key, LARK_WEBHOOK_URL)
else:
send_message_to_lark("本周没有成员互动,无榜单图片生成。", LARK_WEBHOOK_URL)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment