Commit 9e768b59 authored by zhuwenwen's avatar zhuwenwen
Browse files
parents 7bc5a8e3 8aed02b9
1.12.0-11.3.0
1.11.0-11.3.0
1.10.1-11.3.0
1.13.0-11.6.0
2.0.0-11.7.0
[run]
concurrency = multiprocessing
parallel = true
sigterm = true
[flake8]
ignore =
;W503 line break before binary operator
W503,
;E203 whitespace before ':'
E203,
; exclude file
exclude =
.tox,
.git,
__pycache__,
build,
dist,
*.pyc,
*.egg-info,
.cache,
.eggs
max-line-length = 120
per-file-ignores = __init__.py:F401
blank_issues_enabled: true
contact_links:
- name: ❓ Simple question - Slack Chat
url: https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-z7b26eeb-CBp7jouvu~r0~lcFzX832w
url: https://github.com/hpcaitech/public_assets/tree/main/colossalai/contact/slack
about: This issue tracker is not for technical support. Please use our Slack chat, and ask the community for help.
- name: ❓ Simple question - WeChat
url: https://github.com/hpcaitech/ColossalAI/blob/main/docs/images/WeChat.png
......
......@@ -14,7 +14,7 @@
- [Compatibility Test on Dispatch](#compatibility-test-on-dispatch)
- [Release](#release)
- [User Friendliness](#user-friendliness)
- [Commmunity](#commmunity)
- [Community](#community)
- [Configuration](#configuration)
- [Progress Log](#progress-log)
......@@ -43,10 +43,18 @@ I will provide the details of each workflow below.
| Workflow Name | File name | Description |
| ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when the label `Run build and Test` is assigned to a PR. It will run all the unit tests in the repository with 4 GPUs. |
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files and a branch is created/deleted. It will run all the unit tests in the repository with 4 GPUs. |
| `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. |
| `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. |
To reduce the average time of the unit test on PR, `Build on PR` workflow manages testmon cache.
1. When creating a new branch, it copies `cache/main/.testmondata*` to `cache/<branch>/`.
2. When creating a new PR or change the base branch of a PR, it copies `cache/<base_ref>/.testmondata*` to `cache/_pull/<pr_number>/`.
3. When running unit tests for each PR, it restores testmon cache from `cache/_pull/<pr_number>/`. After the test, it stores the cache back to `cache/_pull/<pr_number>/`.
4. When a PR is closed, if it's merged, it copies `cache/_pull/<pr_number>/.testmondata*` to `cache/<base_ref>/`. Otherwise, it just removes `cache/_pull/<pr_number>`.
5. When a branch is deleted, it removes `cache/<ref>`.
### Example Test
| Workflow Name | File name | Description |
......@@ -97,7 +105,7 @@ This workflow is triggered by manually dispatching the workflow. It has the foll
| `Synchronize submodule` | `submodule.yml` | This workflow will check if any git submodule is updated. If so, it will create a PR to update the submodule pointers. |
| `Close inactive issues` | `close_inactive.yml` | This workflow will close issues which are stale for 14 days. |
### Commmunity
### Community
| Workflow Name | File name | Description |
| -------------------------------------------- | -------------------------------- | -------------------------------------------------------------------------------- |
......
......@@ -2,22 +2,93 @@ name: Build on PR
on:
pull_request:
types: [synchronize, labeled]
types: [synchronize, opened, reopened, ready_for_review, closed, edited]
branches:
- "main"
- "develop"
- "feature/**"
paths:
- ".github/workflows/build_on_pr.yml" # run command & env variables change
- "colossalai/**" # source code change
- "!colossalai/**.md" # ignore doc change
- "op_builder/**" # cuda extension change
- "!op_builder/**.md" # ignore doc change
- "requirements/**" # requirements change
- "tests/**" # test change
- "!tests/**.md" # ignore doc change
- "pytest.ini" # test config change
- "setup.py" # install command change
create:
delete:
jobs:
prepare_cache:
name: Prepare testmon cache
if: |
github.event_name == 'create' &&
github.event.ref_type == 'branch' &&
github.event.repository.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Copy testmon cache
run: | # branch name may contain slash, we need to replace it with space
export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /")
if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then
cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}"
fi
env:
MAIN_BRANCH: ${{ github.event.master_branch }}
prepare_cache_for_pr:
name: Prepare testmon cache for PR
if: |
github.event_name == 'pull_request' &&
(github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-repare-cache
cancel-in-progress: true
steps:
- name: Copy testmon cache
run: | # branch name may contain slash, we need to replace it with space
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
if [ -d "/github/home/testmon_cache/${BASE}" ] && [ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ]; then
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER} && cp -p -r "/github/home/testmon_cache/${BASE}"/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}
fi
env:
PR_NUMBER: ${{ github.event.number }}
detect:
name: Detect file change
if: |
github.event_name == 'pull_request' &&
(github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') &&
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' &&
contains( github.event.pull_request.labels.*.name, 'Run Build and Test')
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
outputs:
changedExtenisonFiles: ${{ steps.find-extension-change.outputs.all_changed_files }}
anyExtensionFileChanged: ${{ steps.find-extension-change.outputs.any_changed }}
changedLibraryFiles: ${{ steps.find-lib-change.outputs.all_changed_files }}
anyLibraryFileChanged: ${{ steps.find-lib-change.outputs.any_changed }}
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change
cancel-in-progress: true
steps:
- uses: actions/checkout@v2
with:
......@@ -66,14 +137,18 @@ jobs:
build:
name: Build and Test Colossal-AI
needs: detect
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.11.0-11.3.0
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
timeout-minutes: 40
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
timeout-minutes: 60
defaults:
run:
shell: bash
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test
cancel-in-progress: true
steps:
- name: Checkout TensorNVMe
uses: actions/checkout@v2
......@@ -84,7 +159,9 @@ jobs:
- name: Restore TensorNVMe Cache
run: |
[ ! -z "$(ls -A /github/home/tensornvme_cache/)" ] && cp -p -r /github/home/tensornvme_cache/* /__w/ColossalAI/ColossalAI/TensorNVMe
if [ -d /github/home/tensornvme_cache ] && [ ! -z "$(ls -A /github/home/tensornvme_cache/)" ]; then
cp -p -r /github/home/tensornvme_cache/* /__w/ColossalAI/ColossalAI/TensorNVMe
fi
- name: Install TensorNVMe
run: |
......@@ -107,10 +184,11 @@ jobs:
if: needs.detect.outputs.anyExtensionFileChanged != 'true'
run: |
# -p flag is required to preserve the file timestamp to avoid ninja rebuild
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
if [ -d /github/home/cuda_ext_cache ] && [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ]; then
cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
fi
- name: Install Colossal-AI
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
run: |
CUDA_EXT=1 pip install -v -e .
pip install -r requirements/requirements-test.txt
......@@ -120,14 +198,30 @@ jobs:
# -p flag is required to preserve the file timestamp to avoid ninja rebuild
cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
- name: Restore Testmon Cache
run: |
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ] && [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ]; then
cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/
fi
env:
PR_NUMBER: ${{ github.event.number }}
- name: Execute Unit Testing
if: needs.detect.outputs.anyLibraryFileChanged == 'true'
run: |
CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest --cov=. --cov-report xml tests/
CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest -m "not largedist" --testmon --testmon-forceselect --testmon-cov=. --durations=10 tests/
env:
DATA: /data/scratch/cifar-10
NCCL_SHM_DISABLE: 1
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
TESTMON_CORE_PKGS: /__w/ColossalAI/ColossalAI/requirements/requirements.txt,/__w/ColossalAI/ColossalAI/requirements/requirements-test.txt
LLAMA_PATH: /data/scratch/llama-tiny
- name: Store Testmon Cache
run: |
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER}
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/
env:
PR_NUMBER: ${{ github.event.number }}
- name: Collate artifact
env:
......@@ -140,7 +234,7 @@ jobs:
echo $PR_NUMBER > ./report/pr_number
# generate coverage.xml if any
if [ "$anyLibraryFileChanged" == "true" ]; then
if [ "$anyLibraryFileChanged" == "true" ] && [ -e .coverage ]; then
allFiles=""
for file in $changedLibraryFiles; do
if [ "$allFiles" == "" ]; then
......@@ -165,3 +259,54 @@ jobs:
with:
name: report
path: report/
store_cache:
name: Store testmon cache for PR
if: |
github.event_name == 'pull_request' &&
github.event.action == 'closed' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Store testmon cache if possible
if: github.event.pull_request.merged == true
run: | # branch name may contain slash, we need to replace it with space
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ] && [ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ]; then
cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/"
fi
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Remove testmon cache
run: |
rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER}
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
remove_cache:
name: Remove testmon cache
if: |
github.event_name == 'delete' &&
github.event.ref_type == 'branch' &&
github.event.repository.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --rm
timeout-minutes: 5
defaults:
run:
shell: bash
steps:
- name: Remove testmon cache
run: | # branch name may contain slash, we need to replace it with space
export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /")
rm -rf "/github/home/testmon_cache/${BASE}"
......@@ -3,7 +3,7 @@ name: Build on Schedule
on:
schedule:
# run at 00:00 of every Sunday
- cron: '0 0 * * *'
- cron: "0 0 * * *"
workflow_dispatch:
jobs:
......@@ -12,8 +12,8 @@ jobs:
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, 8-gpu]
container:
image: hpcaitech/pytorch-cuda:1.11.0-11.3.0
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
timeout-minutes: 40
steps:
- name: Check GPU Availability # ensure all GPUs have enough memory
......@@ -60,10 +60,11 @@ jobs:
- name: Unit Testing
if: steps.check-avai.outputs.avai == 'true'
run: |
PYTHONPATH=$PWD pytest tests
PYTHONPATH=$PWD pytest --durations=0 tests
env:
DATA: /data/scratch/cifar-10
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
LLAMA_PATH: /data/scratch/llama-tiny
- name: Notify Lark
id: message-preparation
......
......@@ -44,13 +44,13 @@ jobs:
name: Test for PyTorch Compatibility
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
runs-on: [self-hosted, 8-gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: ${{ matrix.container }}
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
timeout-minutes: 120
steps:
- name: Install dependencies
......@@ -64,16 +64,26 @@ jobs:
- name: Install tensornvme
run: |
cd TensorNVMe
conda install cmake
apt update && apt install -y cmake
pip install -r requirements.txt
pip install -v .
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Download cub for CUDA 10.2
run: |
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
# check if it is CUDA 10.2
# download cub
if [ "$CUDA_VERSION" = "10.2" ]; then
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
fi
- name: Install Colossal-AI
run: |
pip install -r requirements/requirements.txt
pip install -v --no-cache-dir .
CUDA_EXT=1 pip install -v .
pip install -r requirements/requirements-test.txt
- name: Unit Testing
run: |
......@@ -82,3 +92,4 @@ jobs:
DATA: /data/scratch/cifar-10
NCCL_SHM_DISABLE: 1
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
LLAMA_PATH: /data/scratch/llama-tiny
......@@ -3,8 +3,8 @@ name: Compatibility Test on PR
on:
pull_request:
paths:
- 'version.txt'
- '.compatibility'
- "version.txt"
- ".compatibility"
jobs:
matrix_preparation:
......@@ -12,6 +12,9 @@ jobs:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-prepare-matrix
cancel-in-progress: true
steps:
- uses: actions/checkout@v3
- id: set-matrix
......@@ -32,14 +35,17 @@ jobs:
name: Test for PyTorch Compatibility
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
runs-on: [self-hosted, 8-gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: ${{ matrix.container }}
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
timeout-minutes: 120
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-test-${{ matrix.container }}
cancel-in-progress: true
steps:
- name: Install dependencies
run: |
......@@ -52,15 +58,27 @@ jobs:
- name: Install tensornvme
run: |
cd TensorNVMe
conda install cmake
apt update && apt install -y cmake
pip install -r requirements.txt
pip install -v .
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Download cub for CUDA 10.2
run: |
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
# check if it is CUDA 10.2
# download cub
if [ "$CUDA_VERSION" = "10.2" ]; then
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
fi
- name: Install Colossal-AI
run: |
pip install -v --no-cache-dir .
CUDA_EXT=1 pip install -v .
pip install -r requirements/requirements-test.txt
- name: Unit Testing
run: |
......@@ -69,3 +87,4 @@ jobs:
DATA: /data/scratch/cifar-10
NCCL_SHM_DISABLE: 1
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
LLAMA_PATH: /data/scratch/llama-tiny
......@@ -32,13 +32,13 @@ jobs:
name: Test for PyTorch Compatibility
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
runs-on: [self-hosted, 8-gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: ${{ matrix.container }}
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
timeout-minutes: 120
steps:
- name: Install dependencies
......@@ -54,16 +54,28 @@ jobs:
- name: Install tensornvme
run: |
cd TensorNVMe
conda install cmake
apt update && apt install -y cmake
pip install -r requirements.txt
pip install -v .
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Download cub for CUDA 10.2
run: |
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
# check if it is CUDA 10.2
# download cub
if [ "$CUDA_VERSION" = "10.2" ]; then
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
fi
- name: Install Colossal-AI
run: |
pip install -v --no-cache-dir .
CUDA_EXT=1 pip install -v .
pip install -r requirements/requirements-test.txt
- name: Unit Testing
......@@ -73,6 +85,7 @@ jobs:
DATA: /data/scratch/cifar-10
NCCL_SHM_DISABLE: 1
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
LLAMA_PATH: /data/scratch/llama-tiny
- name: Notify Lark
id: message-preparation
......
......@@ -37,6 +37,18 @@ jobs:
- name: Install PyTorch
run: eval ${{ matrix.build.torch_command }}
- name: Download cub for CUDA 10.2
run: |
CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
# check if it is CUDA 10.2
# download cub
if [ "$CUDA_VERSION" = "10.2" ]; then
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
fi
- name: Build
run: |
CUDA_EXT=1 pip install -v .
name: Build Documentation After Merge
name: Build Documentation On Schedule & After Release
on:
workflow_dispatch:
pull_request:
paths:
- 'version.txt'
- 'docs/**'
types:
- closed
schedule:
- cron: "0 12 * * *" # build doc every day at 8pm Singapore time (12pm UTC time)
release:
types: [published]
jobs:
build-doc:
name: Trigger Documentation Build Workflow
if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
steps:
- name: trigger workflow in ColossalAI-Documentation
......
......@@ -2,23 +2,29 @@ name: Check Documentation on PR
on:
pull_request:
branches:
- "main"
- "develop"
- "feature/**"
paths:
- 'docs/**'
- "docs/**"
jobs:
check-i18n:
name: Check docs in diff languages
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-check-i18n
cancel-in-progress: true
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
python-version: "3.8.14"
- run: python .github/workflows/scripts/check_doc_i18n.py -d docs/source
......@@ -26,33 +32,38 @@ jobs:
name: Test if the docs can be built
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-check-doc
cancel-in-progress: true
steps:
- uses: actions/checkout@v2
with:
path: './ColossalAI'
path: "./ColossalAI"
fetch-depth: 0
- uses: actions/checkout@v2
with:
path: './ColossalAI-Documentation'
repository: 'hpcaitech/ColossalAI-Documentation'
path: "./ColossalAI-Documentation"
repository: "hpcaitech/ColossalAI-Documentation"
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
python-version: "3.8.14"
# we use the versions in the main branch as the guide for versions to display
# checkout will give your merged branch
# therefore, we need to make the merged branch as the main branch
# there is no main branch, so it's safe to checkout the main branch from the merged branch
# docer will rebase the remote main branch to the merged branch, so we have to config user
- name: Make the merged branch main
run: |
cd ColossalAI
curBranch=$(git rev-parse --abbrev-ref HEAD)
git checkout main
git merge $curBranch # fast-forward master up to the merge
git checkout -b main
git branch -u origin/main
git config user.name 'github-actions'
git config user.email 'github-actions@github.com'
- name: Build docs
run: |
......
name: Test Documentation on PR
on:
pull_request:
branches:
- "main"
- "develop"
- "feature/**"
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'docs/source/**.md'
- "docs/source/**.md"
jobs:
# This is for changed example files detect and output a matrix containing all the corresponding directory name.
detect-changed-doc:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-files.outputs.any_changed }}
changed_files: ${{ steps.changed-files.outputs.all_changed_files }}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change
cancel-in-progress: true
name: Detect changed example files
steps:
- uses: actions/checkout@v3
......@@ -44,7 +50,6 @@ jobs:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' &&
needs.detect-changed-doc.outputs.any_changed == 'true'
name: Test the changed Doc
......@@ -57,12 +62,15 @@ jobs:
defaults:
run:
shell: bash
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-doctest
cancel-in-progress: true
steps:
- name: Checkout ColossalAI-Documentation
uses: actions/checkout@v2
with:
path: './ColossalAI-Documentation'
repository: 'hpcaitech/ColossalAI-Documentation'
path: "./ColossalAI-Documentation"
repository: "hpcaitech/ColossalAI-Documentation"
- name: Install Docer
run: |
......@@ -81,12 +89,12 @@ jobs:
- name: Install ColossalAI
run: |
source activate pytorch
pip install -v .
CUDA_EXT=1 pip install -v .
- name: Test the Doc
run: |
source activate pytorch
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
for file in ${{ needs.detect-changed-doc.outputs.changed_files }}; do
echo "Testing $file now..."
docer test -p $file
done
......
......@@ -32,7 +32,7 @@ jobs:
- name: Install ColossalAI
run: |
pip install -v .
CUDA_EXT=1 pip install -v .
- name: Install Doc Test Requirements
run: |
......
......@@ -53,7 +53,7 @@ jobs:
uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
CUDA_EXT=1 pip install -v .
- name: Test the example
run: |
dir=${{ matrix.directory }}
......
name: Test Example on PR
on:
pull_request:
branches:
- "main"
- "develop"
- "feature/**"
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'examples/**'
- "examples/**"
jobs:
# This is for changed example files detect and output a matrix containing all the corresponding directory name.
detect-changed-example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
name: Detect changed example files
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change
cancel-in-progress: true
steps:
- uses: actions/checkout@v3
with:
......@@ -62,7 +68,6 @@ jobs:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' &&
needs.detect-changed-example.outputs.anyChanged == 'true'
name: Test the changed example
......@@ -75,12 +80,15 @@ jobs:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-example-${{ matrix.directory }}
cancel-in-progress: true
steps:
- uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
CUDA_EXT=1 pip install -v .
- name: Test the example
run: |
......
......@@ -42,7 +42,7 @@ jobs:
- name: Install Colossal-AI
run: |
pip install -v .
CUDA_EXT=1 pip install -v .
- name: Traverse all files
run: |
......
name: Publish Docker Image to DockerHub after Merge
name: Publish Docker Image to DockerHub after Publish
on:
workflow_dispatch:
pull_request:
paths:
- 'version.txt'
types:
- closed
release:
types: [published]
jobs:
release:
name: Publish Docker Image to DockerHub
if: ( github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true ) && github.repository == 'hpcaitech/ColossalAI'
if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu]
container:
image: "hpcaitech/docker-in-docker:latest"
......@@ -26,8 +23,11 @@ jobs:
run: |
version=$(cat version.txt)
tag=hpcaitech/colossalai:$version
docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 -t $tag ./docker
latest=hpcaitech/colossalai:latest
docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 --build-arg VERSION=v${version} -t $tag ./docker
docker tag $tag $latest
echo "tag=${tag}" >> $GITHUB_OUTPUT
echo "latest=${latest}" >> $GITHUB_OUTPUT
- name: Log in to Docker Hub
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
......@@ -39,6 +39,7 @@ jobs:
id: docker-push
run: |
docker push ${{ steps.build.outputs.tag }}
docker push ${{ steps.build.outputs.latest }}
notify:
name: Notify Lark via webhook
......@@ -50,7 +51,7 @@ jobs:
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
python-version: "3.8.14"
- name: Install requests
run: pip install requests
......
......@@ -9,8 +9,9 @@ on:
jobs:
report-test-coverage:
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
steps:
- name: 'Download artifact'
- name: "Download artifact"
uses: actions/github-script@v6
with:
script: |
......@@ -31,7 +32,7 @@ jobs:
let fs = require('fs');
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/report.zip`, Buffer.from(download.data));
- name: 'Unzip artifact'
- name: "Unzip artifact"
id: unzip
run: |
unzip report.zip
......@@ -58,7 +59,7 @@ jobs:
echo "</details>" >> coverage_report.txt
mv coverage_report.txt coverage.txt
- name: 'Comment on PR'
- name: "Comment on PR"
if: steps.unzip.outputs.hasReport == 'true'
uses: actions/github-script@v6
with:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment