# Python CircleCI 2.0 configuration file # # Check https://circleci.com/docs/2.0/language-python/ for more details # # Adopted from # https://github.com/facebookresearch/detectron2/blob/master/.circleci/config.yml version: 2 # ------------------------------------------------------------------------------------- # Environments to run the jobs in # ------------------------------------------------------------------------------------- cpu: &cpu docker: - image: circleci/python:3.7 resource_class: medium gpu: &gpu environment: CUDA_VERSION: "10.1" machine: image: ubuntu-1604-cuda-10.1:201909-23 resource_class: gpu.large # ------------------------------------------------------------------------------------- # Re-usable commands # ------------------------------------------------------------------------------------- setup_venv: &setup_venv - run: name: Setup Virtual Env working_directory: ~/ command: | python -m venv ~/venv echo ". ~/venv/bin/activate" >> $BASH_ENV . ~/venv/bin/activate python --version which python which pip pip install --upgrade pip install_dep_151: &install_dep_151 - run: name: Install Dependencies command: | sudo apt-get install -y libopenmpi-dev pip install --progress-bar off torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html pip install --progress-bar off -r requirements-test.txt python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "5"], "wrong torch version"' python -m torch.utils.collect_env install_dep_160: &install_dep_160 - run: name: Install Dependencies command: | sudo apt-get install -y libopenmpi-dev pip install --progress-bar off torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html pip install --progress-bar off -r requirements-test.txt pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "6"], "wrong torch version"' python -m torch.utils.collect_env install_dep_171: &install_dep_171 - run: name: Install Dependencies command: | sudo apt-get install -y libopenmpi-dev pip install --progress-bar off torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html pip install --progress-bar off -r requirements-test.txt pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "7"], "wrong torch version"' python -m torch.utils.collect_env install_repo_cpu: &install_repo_cpu - run: name: Install Repository command: | pip install . # Test import. python -c 'import sys; sys.path = sys.path[1:]; import fairscale' install_repo_gpu: &install_repo_gpu - run: name: Install Repository command: | export CUDA_HOME=/usr/local/cuda-10.1 pip install -e . run_coverage: &run_coverage - run: name: Run Unit Tests With Coverage command: | pytest --cov-report=xml --cov=./ --timeout 60 #Uploading test coverage for Python code bash <(curl -s https://codecov.io/bash) -f coverage.xml -cF Python run_unittests: &run_unittests - run: name: Run Unit Tests command: | pytest --junitxml=test-results/junit.xml --verbose --timeout 60 run_mpi_unittests: &run_mpi_unittests - run: name: Run MPI Unit Tests command: | mpirun -n 4 python -m pytest -p torch_pg.pytest --only-mpi --junitxml=test-results/junit.xml --verbose tests/nn/moe run_flake8: &run_flake8 - run: name: Run Linter (flake8) command: | flake8 --show-source --statistics run_pipe_benchmark: &run_pipe_benchmark - run: name: Run Pipe Benchmark command: | python benchmarks/pipe.py run_oss_benchmark: &run_oss_benchmark - run: name: Run OSS Benchmark command: | python benchmarks/oss.py --world_size 4 --epochs 2 python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp --reference_speed 660 --reference_memory 930 --reference_loss 0.023 run_oss_gloo: &run_oss_gloo - run: name: Run OSS with Gloo command: | python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2 python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2 run_oss_amp: &run_oss_amp - run: name: Run OSS with Torch AMP command: | python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp # ------------------------------------------------------------------------------------- # Jobs to run # ------------------------------------------------------------------------------------- jobs: cpu_tests: <<: *cpu working_directory: ~/fairscale steps: - checkout - <<: *setup_venv # Cache the venv directory that contains dependencies - restore_cache: keys: - cache-key-cpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_dep_171 - save_cache: paths: - ~/venv key: cache-key-cpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_repo_cpu - run: name: Run Linter (isort) command: | isort . --check - run: name: Run Linter (black) command: | black --check . - run: name: Run type-checking (mypy) command: | mypy --ignore-missing-imports --scripts-are-modules --pretty . - <<: *run_flake8 - <<: *run_unittests - <<: *run_mpi_unittests - store_test_results: path: test-results gpu_tests_151: <<: *gpu working_directory: ~/fairscale steps: - checkout - run: nvidia-smi - run: pyenv global 3.7.0 - <<: *setup_venv # Cache the venv directory that contains dependencies - restore_cache: keys: - cache-key-gpu-151-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_dep_151 - save_cache: paths: - ~/venv key: cache-key-gpu-151-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_repo_gpu - <<: *run_unittests - store_test_results: path: test-results gpu_tests_160: <<: *gpu working_directory: ~/fairscale steps: - checkout - run: nvidia-smi - run: pyenv global 3.7.0 - <<: *setup_venv # Cache the venv directory that contains dependencies - restore_cache: keys: - cache-key-gpu-160-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_dep_160 - save_cache: paths: - ~/venv key: cache-key-gpu-160-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_repo_gpu - <<: *run_unittests - <<: *run_coverage - store_test_results: path: test-results gpu_tests_171: <<: *gpu working_directory: ~/fairscale steps: - checkout - run: nvidia-smi - run: pyenv global 3.7.0 - <<: *setup_venv # Cache the venv directory that contains dependencies - restore_cache: keys: - cache-key-gpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_dep_171 - save_cache: paths: - ~/venv key: cache-key-gpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_repo_gpu - <<: *run_unittests - store_test_results: path: test-results benchmarks: <<: *gpu working_directory: ~/fairscale steps: - checkout - run: nvidia-smi - run: pyenv uninstall -f 3.7.0 - run: pyenv install 3.7.0 - run: pyenv global 3.7.0 - <<: *setup_venv # Cache the venv directory that contains dependencies - restore_cache: keys: - cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_dep_171 - save_cache: paths: - ~/venv key: cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - <<: *install_repo_gpu - <<: *run_pipe_benchmark - <<: *run_oss_benchmark - <<: *run_oss_gloo - <<: *run_oss_amp workflows: version: 2 build: jobs: - cpu_tests - gpu_tests_151 - gpu_tests_160 - gpu_tests_171 - benchmarks