Commit e532679c authored by oahzxl's avatar oahzxl
Browse files

Merge branch 'main' of https://github.com/oahzxl/ColossalAI into chunk

parents c1492e50 7d5640b9
......@@ -20,6 +20,8 @@ body:
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Optional: Affiliation**
Institution/email information helps better analyze and evaluate users to improve the project. Welcome to establish in-depth cooperation.
placeholder: |
A clear and concise description of what the bug is.
validations:
......
......@@ -17,6 +17,7 @@ body:
**Expectation** What is your expected content about it?
**Screenshots** If applicable, add screenshots to help explain your problem.
**Suggestions** Tell us how we could improve the documentation.
**Optional: Affiliation** Institution/email information helps better analyze and evaluate users to improve the project. Welcome to establish in-depth cooperation.
placeholder: |
A clear and concise description of the issue.
validations:
......
......@@ -22,6 +22,8 @@ body:
If applicable, add screenshots to help explain your problem.
**Suggest a potential alternative/fix**
Tell us how we could improve this project.
**Optional: Affiliation**
Institution/email information helps better analyze and evaluate users to improve the project. Welcome to establish in-depth cooperation.
placeholder: |
A clear and concise description of your idea.
validations:
......
......@@ -13,6 +13,7 @@ body:
- Bumping a critical dependency's major version;
- A significant improvement in user-friendliness;
- Significant refactor;
- Optional: Affiliation/email information helps better analyze and evaluate users to improve the project. Welcome to establish in-depth cooperation.
- ...
Please note this is not for feature request or bug template; such action could make us identify the issue wrongly and close it without doing anything.
......@@ -43,4 +44,4 @@ body:
- type: markdown
attributes:
value: >
Thanks for contributing 🎉!
\ No newline at end of file
Thanks for contributing 🎉!
addReviewers: true
addAssignees: author
numberOfReviewers: 1
reviewers:
- frankleeeee
- kurisusnowdeng
name: Assign Reviewers for Team
on:
pull_request:
types: [opened]
jobs:
assign_reviewer:
name: Assign Reviewer for PR
runs-on: ubuntu-latest
if: |
github.event.pull_request.draft == false && github.base_ref == 'main'
&& github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
&& toJson(github.event.pull_request.requested_reviewers) == '[]'
steps:
- uses: kentaro-m/auto-assign-action@v1.2.1
with:
configuration-path: '.github/reviewer_list.yml'
name: Test Example
on:
pull_request:
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'examples/**'
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
schedule:
- cron: '0 16 * * 6'
jobs:
# This is for changed example files detect and output a matrix containing all the corresponding directory name.
detect-changed-example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
name: Detect changed example files
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Get all changed example files
id: changed-files
uses: tj-actions/changed-files@v35
# Using this can trigger action each time a PR is submitted.
with:
since_last_remote_commit: true
- name: setup matrix
id: setup-matrix
run: |
changedFileName=""
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
changedFileName="${file}:${changedFileName}"
done
echo "$changedFileName was changed"
res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName`
echo "All changed examples are $res"
if [ "$x" = "[]" ]; then
echo "anyChanged=false" >> $GITHUB_OUTPUT
echo "matrix=null" >> $GITHUB_OUTPUT
else
dirs=$( IFS=',' ; echo "${res[*]}" )
echo "anyChanged=true" >> $GITHUB_OUTPUT
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
fi
# If no file is changed, it will prompt an error and shows the matrix do not have value.
check-changed-example:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
name: Test the changed example
needs: detect-changed-example
runs-on: [self-hosted, gpu]
strategy:
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
- name: Test the example
run: |
example_dir=${{ matrix.directory }}
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
# This is for all files' weekly check. Specifically, this job is to find all the directories.
matrix_preparation:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'schedule'
name: Prepare matrix for weekly check
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.setup-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: setup matrix
id: setup-matrix
run: |
res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
all_loc=$( IFS=',' ; echo "${res[*]}" )
echo "Found the examples: $all_loc"
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
weekly_check:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'schedule'
name: Weekly check all examples
needs: matrix_preparation
runs-on: [self-hosted, gpu]
strategy:
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
- name: Traverse all files
run: |
example_dir=${{ matrix.diretory }}
echo "Testing ${example_dir} now"
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
name: Build
on:
on:
pull_request:
types: [synchronize, labeled]
jobs:
build:
name: Build and Test Colossal-AI
detect:
name: Detect kernel-related file change
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' &&
contains( github.event.pull_request.labels.*.name, 'Run Build and Test')
outputs:
changedFiles: ${{ steps.find-changed-files.outputs.changedFiles }}
anyChanged: ${{ steps.find-changed-files.outputs.any_changed }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v35
with:
since_last_remote_commit: true
files: |
op_builder/**
colossalai/kernel/**
setup.py
- name: List changed files
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
echo "$file was changed"
done
build:
name: Build and Test Colossal-AI
needs: detect
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.11.0-11.3.0
......@@ -23,27 +51,38 @@ jobs:
repository: hpcaitech/TensorNVMe
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
path: TensorNVMe
- name: Install tensornvme
run: |
cd TensorNVMe
conda install cmake
pip install -r requirements.txt
pip install -v .
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Install Colossal-AI
- name: Restore cache
if: needs.detect.outputs.anyChanged != 'true'
run: |
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
pip install -r requirements/requirements.txt
pip install -v -e .
cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
cp /__w/ColossalAI/ColossalAI/*.so /github/home/cuda_ext_cache/
# -p flag is required to preserve the file timestamp to avoid ninja rebuild
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
- name: Install Colossal-AI
run: |
CUDA_EXT=1 pip install -v -e .
pip install -r requirements/requirements-test.txt
- name: Unit Testing
run: |
PYTHONPATH=$PWD pytest tests
PYTHONPATH=$PWD pytest --cov=. --cov-report lcov tests
env:
DATA: /data/scratch/cifar-10
NCCL_SHM_DISABLE: 1
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
- name: Store Cache
run: |
# -p flag is required to preserve the file timestamp to avoid ninja rebuild
cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
......@@ -2,7 +2,7 @@ name: Build on 8 GPUs
on:
schedule:
# run at 00:00 of every Sunday
# run at 00:00 of every Sunday
- cron: '0 0 * * *'
workflow_dispatch:
......@@ -30,13 +30,11 @@ jobs:
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Install Colossal-AI
- name: Install Colossal-AI
run: |
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
pip install -r requirements/requirements.txt
pip install -v -e .
CUDA_EXT=1 pip install -v -e .
cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
cp /__w/ColossalAI/ColossalAI/*.so /github/home/cuda_ext_cache/
pip install -r requirements/requirements-test.txt
- name: Unit Testing
run: |
......@@ -45,4 +43,3 @@ jobs:
env:
DATA: /data/scratch/cifar-10
LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
\ No newline at end of file
......@@ -70,7 +70,7 @@ jobs:
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Install Colossal-AI
- name: Install Colossal-AI
run: |
pip install -r requirements/requirements.txt
pip install -v --no-cache-dir .
......
name: Manual Test Example
on:
workflow_dispatch:
inputs:
example_directory:
type: string
description: example directory, separated by space. For example, language/gpt, images/vit. Simply input language or simply gpt does not work.
required: true
jobs:
matrix_preparation:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
name: Check the examples user want
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Set up matrix
id: set-matrix
env:
check_dir: ${{ inputs.example_directory }}
run: |
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
if [ res == "failure" ];then
exit -1
fi
dirs="[${check_dir}]"
echo "Testing examples in $dirs"
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
test_example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
name: Manually check example files
needs: manual_check_matrix_preparation
runs-on: [self-hosted, gpu]
strategy:
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install Colossal-AI
run: |
pip install -v .
- name: Test the example
run: |
dir=${{ matrix.directory }}
echo "Testing ${dir} now"
cd "${PWD}/examples/${dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
......@@ -20,7 +20,7 @@ jobs:
fetch-depth: 0
- uses: actions/setup-python@v2
with:
python-version: '3.7.12'
python-version: '3.8.14'
- name: generate draft
id: generate_draft
run: |
......@@ -42,4 +42,3 @@ jobs:
body_path: ${{ steps.generate_draft.outputs.path }}
draft: True
prerelease: false
\ No newline at end of file
......@@ -64,9 +64,21 @@ jobs:
- name: Copy scripts and checkout
run: |
cp -r ./.github/workflows/scripts/* ./
# link the cache diretories to current path
ln -s /github/home/conda_pkgs ./conda_pkgs
ln -s /github/home/pip_wheels ./pip_wheels
# set the conda package path
echo "pkgs_dirs:\n - $PWD/conda_pkgs" > ~/.condarc
# set safe directory
git config --global --add safe.directory /__w/ColossalAI/ColossalAI
# check out
git checkout $git_ref
# get cub package for cuda 10.2
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
env:
......
......@@ -18,23 +18,17 @@ jobs:
with:
fetch-depth: 0
- name: Build Docker
id: build
run: |
version=$(cat version.txt)
docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 -t hpcaitech/colossalai:$version ./docker
tag=hpcaitech/colossalai:$version
docker build --build-arg http_proxy=http://172.17.0.1:7890 --build-arg https_proxy=http://172.17.0.1:7890 -t $tag ./docker
echo "tag=${tag}" >> $GITHUB_OUTPUT
- name: Log in to Docker Hub
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images: hpcaitech/colossalai
- name: Build and push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
- name: Push Docker image
run: |
docker push ${{ steps.build.outputs.tag }}
name: Release bdist wheel for Nightly versions
name: Publish Nightly Version to PyPI
on:
schedule:
# run at 00:00 of every Sunday
- cron: '0 0 * * 6'
workflow_dispatch:
schedule:
- cron: '0 0 * * 6' # release on every Sunday 00:00 UTC time
jobs:
matrix_preparation:
name: Prepare Container List
build-n-publish:
if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI'
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
timeout-minutes: 20
steps:
- id: set-matrix
run: |
matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]"
echo $matrix
echo "::set-output name=matrix::{\"container\":$(echo $matrix)}"
- uses: actions/checkout@v2
build:
name: Release bdist wheels
needs: matrix_preparation
if: github.repository == 'hpcaitech/ColossalAI' && contains(fromJson('["FrankLeeeee", "ver217", "feifeibear", "kurisusnowdeng"]'), github.actor)
runs-on: [self-hosted, gpu]
strategy:
fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: ${{ matrix.container }}
options: --gpus all --rm
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
# cub is for cuda 10.2
- name: Copy scripts and checkout
run: |
cp -r ./.github/workflows/scripts/* ./
ln -s /github/home/pip_wheels ./pip_wheels
wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
unzip 1.8.0.zip
- name: Build bdist wheel
run: |
pip install beautifulsoup4 requests packaging
python ./build_colossalai_wheel.py --nightly
- name: 🚀 Deploy
uses: garygrossgarten/github-action-scp@release
with:
local: all_dist
remote: ${{ secrets.PRIVATE_PYPI_NIGHTLY_DIR }}
host: ${{ secrets.PRIVATE_PYPI_HOST }}
username: ${{ secrets.PRIVATE_PYPI_USER }}
password: ${{ secrets.PRIVATE_PYPI_PASSWD }}
remove_old_build:
name: Remove old nightly build
runs-on: ubuntu-latest
needs: build
steps:
- name: executing remote ssh commands using password
uses: appleboy/ssh-action@master
env:
BUILD_DIR: ${{ secrets.PRIVATE_PYPI_NIGHTLY_DIR }}
with:
host: ${{ secrets.PRIVATE_PYPI_HOST }}
username: ${{ secrets.PRIVATE_PYPI_USER }}
password: ${{ secrets.PRIVATE_PYPI_PASSWD }}
envs: BUILD_DIR
script: |
cd $BUILD_DIR
find . -type f -mtime +0 -exec rm -f {} +
script_stop: true
- uses: actions/setup-python@v2
with:
python-version: '3.8.14'
- run: NIGHTLY=1 python setup.py sdist build
# publish to PyPI if executed on the main branch
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
verbose: true
name: Publish to PyPI
on: workflow_dispatch
on:
workflow_dispatch:
pull_request:
paths:
- 'version.txt'
types:
- closed
jobs:
build-n-publish:
if: github.ref_name == 'main' && github.repository == 'hpcaitech/ColossalAI' && contains(fromJson('["FrankLeeeee", "ver217", "feifeibear", "kurisusnowdeng"]'), github.actor)
if: github.event_name == 'workflow_dispatch' || github.repository == 'hpcaitech/ColossalAI' && github.event.pull_request.merged == true && github.base_ref == 'main'
name: Build and publish Python 🐍 distributions 📦 to PyPI
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.7.12'
python-version: '3.8.14'
- run: python setup.py sdist build
# publish to PyPI if executed on the main branch
# publish to Test PyPI if executed on the develop branch
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
......
name: Publish to Test PyPI
on: workflow_dispatch
jobs:
build-n-publish:
if: github.repository == 'hpcaitech/ColossalAI' && contains(fromJson('["FrankLeeeee", "ver217", "feifeibear", "kurisusnowdeng"]'), github.actor)
name: Build and publish Python 🐍 distributions 📦 to Test PyPI
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.7.12'
- run: python setup.py sdist build
# publish to PyPI if executed on the main branch
# publish to Test PyPI if executed on the develop branch
- name: Publish package to Test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
verbose: true
from filecmp import cmp
import requests
from bs4 import BeautifulSoup
import argparse
import os
import subprocess
from packaging import version
from filecmp import cmp
from functools import cmp_to_key
import requests
from bs4 import BeautifulSoup
from packaging import version
WHEEL_TEXT_ROOT_URL = 'https://github.com/hpcaitech/public_assets/tree/main/colossalai/torch_build/torch_wheels'
RAW_TEXT_FILE_PREFIX = 'https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/torch_build/torch_wheels'
CUDA_HOME = os.environ['CUDA_HOME']
......
......@@ -18,7 +18,7 @@ if [ $1 == "pip" ]
then
wget -nc -q -O ./pip_wheels/$filename $url
pip install ./pip_wheels/$filename
elif [ $1 == 'conda' ]
then
conda install pytorch==$torch_version cudatoolkit=$cuda_version $flags
......@@ -34,8 +34,9 @@ fi
python setup.py bdist_wheel
mv ./dist/* ./all_dist
# must remove build to enable compilation for
# cuda extension in the next build
rm -rf ./build
python setup.py clean
conda deactivate
conda env remove -n $python_version
import argparse
import os
def check_inputs(input_list):
for path in input_list:
real_path = os.path.join('examples', path)
if not os.path.exists(real_path):
return False
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
args = parser.parse_args()
name_list = args.fileNameList.split(",")
is_correct = check_inputs(name_list)
if is_correct:
print('success')
else:
print('failure')
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment