config.yml 9.07 KB
Newer Older
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
# Adopted from
# https://github.com/facebookresearch/detectron2/blob/master/.circleci/config.yml

version: 2

# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
cpu: &cpu
  docker:
    - image: circleci/python:3.7
  resource_class: medium

gpu: &gpu
  environment:
    CUDA_VERSION: "10.1"
  machine:
    image: ubuntu-1604-cuda-10.1:201909-23
  resource_class: gpu.large

# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
      command: |
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip

41
install_dep_151: &install_dep_151
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
42
43
44
  - run:
      name: Install Dependencies
      command: |
45
        sudo apt-get install -y libopenmpi-dev
46
        pip install --progress-bar off torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
47
48
        pip install --progress-bar off -r requirements-test.txt
        python -c 'import torch; print("Torch version:", torch.__version__)'
49
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "5"], "wrong torch version"'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
50
51
        python -m torch.utils.collect_env

52
install_dep_160: &install_dep_160
53
54
55
  - run:
      name: Install Dependencies
      command: |
56
        sudo apt-get install -y libopenmpi-dev
57
        pip install --progress-bar off torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
58
        pip install --progress-bar off -r requirements-test.txt
59
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
60
        python -c 'import torch; print("Torch version:", torch.__version__)'
61
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "6"], "wrong torch version"'
62
63
        python -m torch.utils.collect_env

64

65
install_dep_171: &install_dep_171
66
67
68
69
  - run:
      name: Install Dependencies
      command: |
        sudo apt-get install -y libopenmpi-dev
70
        pip install --progress-bar off torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html
71
72
73
        pip install --progress-bar off -r requirements-test.txt
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
        python -c 'import torch; print("Torch version:", torch.__version__)'
74
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "7"], "wrong torch version"'
75
76
        python -m torch.utils.collect_env

Jun Ru Anderson's avatar
Jun Ru Anderson committed
77
install_repo_cpu: &install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
78
79
80
  - run:
      name: Install Repository
      command: |
81
82
83
        pip install .
        # Test import.
        python -c 'import sys; sys.path = sys.path[1:]; import fairscale'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
84

Jun Ru Anderson's avatar
Jun Ru Anderson committed
85
86
87
88
89
install_repo_gpu: &install_repo_gpu
  - run:
      name: Install Repository
      command: |
        export CUDA_HOME=/usr/local/cuda-10.1
90
        pip install -e .
msbaines's avatar
msbaines committed
91
92
93
94
95

run_coverage: &run_coverage
  - run:
      name: Run Unit Tests With Coverage
      command: |
Benjamin Lefaudeux's avatar
Benjamin Lefaudeux committed
96
        pytest --cov-report=xml --cov=./ --timeout 60
msbaines's avatar
msbaines committed
97
98
99
        #Uploading test coverage for Python code
        bash <(curl -s https://codecov.io/bash) -f coverage.xml -cF Python

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
100
101
102
103
run_unittests: &run_unittests
  - run:
      name: Run Unit Tests
      command: |
Benjamin Lefaudeux's avatar
Benjamin Lefaudeux committed
104
        pytest --junitxml=test-results/junit.xml --verbose --timeout 60
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
105

106
107
108
109
run_mpi_unittests: &run_mpi_unittests
  - run:
      name: Run MPI Unit Tests
      command: |
110
        mpirun -n 4 python -m pytest -p torch_pg.pytest --only-mpi --junitxml=test-results/junit.xml --verbose tests/nn/moe
111

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
112
113
114
115
116
117
run_flake8: &run_flake8
  - run:
      name: Run Linter (flake8)
      command: |
        flake8 --show-source --statistics

Jun Ru Anderson's avatar
Jun Ru Anderson committed
118
run_pipe_benchmark: &run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
119
  - run:
Jun Ru Anderson's avatar
Jun Ru Anderson committed
120
      name: Run Pipe Benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
121
      command: |
Jun Ru Anderson's avatar
Jun Ru Anderson committed
122
        python benchmarks/pipe.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
123

124
125
126
127
run_oss_benchmark: &run_oss_benchmark
  - run:
      name: Run OSS Benchmark
      command: |
128
129
        python benchmarks/oss.py --world_size 4 --epochs 2
        python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp --reference_speed 660 --reference_memory 930 --reference_loss 0.023
130
131

run_oss_gloo: &run_oss_gloo
132
133
134
135
136
137
  - run:
      name: Run OSS with Gloo
      command: |
        python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2
        python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2

138

139
140
141
142
143
run_oss_amp: &run_oss_amp
- run:
    name: Run OSS with Torch AMP
    command: |
      python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp
144

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:
  cpu_tests:
    <<: *cpu

    working_directory: ~/fairscale

    steps:
      - checkout
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
162
            - cache-key-cpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
163

164
      - <<: *install_dep_171
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
165
166
167
168

      - save_cache:
          paths:
            - ~/venv
169
          key: cache-key-cpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
170

Jun Ru Anderson's avatar
Jun Ru Anderson committed
171
      - <<: *install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
172
173
174
175

      - run:
          name: Run Linter (isort)
          command: |
176
            isort . --check
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
177
178
179
180
181
182
183
184
185

      - run:
          name: Run Linter (black)
          command: |
            black --check .

      - run:
          name: Run type-checking (mypy)
          command: |
Tom Birch's avatar
Tom Birch committed
186
            mypy --ignore-missing-imports --scripts-are-modules --pretty .
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
187
188
189
190
191

      - <<: *run_flake8

      - <<: *run_unittests

192
193
      - <<: *run_mpi_unittests

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
194
195
196
      - store_test_results:
          path: test-results

197
  gpu_tests_151:
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
214
            - cache-key-gpu-151-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
215

216
      - <<: *install_dep_151
217
218
219
220

      - save_cache:
          paths:
            - ~/venv
221
          key: cache-key-gpu-151-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
222
223
224
225
226
227
228
229

      - <<: *install_repo_gpu

      - <<: *run_unittests

      - store_test_results:
          path: test-results

230
  gpu_tests_160:
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
247
            - cache-key-gpu-160-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
248

249
      - <<: *install_dep_160
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
250
251
252
253

      - save_cache:
          paths:
            - ~/venv
254
          key: cache-key-gpu-160-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
255

Jun Ru Anderson's avatar
Jun Ru Anderson committed
256
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
257
258
259

      - <<: *run_unittests

msbaines's avatar
msbaines committed
260
261
      - <<: *run_coverage

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
262
263
264
      - store_test_results:
          path: test-results

265
  gpu_tests_171:
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
282
            - cache-key-gpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
283

284
      - <<: *install_dep_171
285
286
287
288

      - save_cache:
          paths:
            - ~/venv
289
          key: cache-key-gpu-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
290
291
292
293
294
295
296
297

      - <<: *install_repo_gpu

      - <<: *run_unittests

      - store_test_results:
          path: test-results

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
298
299
300
301
302
303
304
305
306
307
  benchmarks:
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

308
309
310
311
      - run: pyenv uninstall -f 3.7.0

      - run: pyenv install 3.7.0

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
312
313
314
315
316
317
318
      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
319
            - cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
320

321
      - <<: *install_dep_171
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
322
323
324
325

      - save_cache:
          paths:
            - ~/venv
326
          key: cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
327

Jun Ru Anderson's avatar
Jun Ru Anderson committed
328
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
329

Jun Ru Anderson's avatar
Jun Ru Anderson committed
330
      - <<: *run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
331

332
333
      - <<: *run_oss_benchmark

334
335
      - <<: *run_oss_gloo

336
337
      - <<: *run_oss_amp

338

339

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
340
341
342
343
344
345

workflows:
  version: 2
  build:
    jobs:
      - cpu_tests
346
347
      - gpu_tests_151
      - gpu_tests_160
348
      - gpu_tests_171
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
349
      - benchmarks