config.yml 9.07 KB
Newer Older
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
# Adopted from
# https://github.com/facebookresearch/detectron2/blob/master/.circleci/config.yml

version: 2

# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
cpu: &cpu
  docker:
    - image: circleci/python:3.7
  resource_class: medium

gpu: &gpu
  environment:
    CUDA_VERSION: "10.1"
  machine:
    image: ubuntu-1604-cuda-10.1:201909-23
  resource_class: gpu.large

# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
      command: |
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip

41
install_dep_15: &install_dep_15
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
42
43
44
  - run:
      name: Install Dependencies
      command: |
45
        sudo apt-get install -y libopenmpi-dev
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
46
        pip install --progress-bar off -r requirements-test.txt
47
        pip install --progress-bar off torch==1.5.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
48
49
50
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

51
52
53
54
install_dep_16: &install_dep_16
  - run:
      name: Install Dependencies
      command: |
55
        sudo apt-get install -y libopenmpi-dev
56
        pip install --progress-bar off -r requirements-test.txt
57
        pip install --progress-bar off torch==1.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
58
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
59
60
61
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

62
63

install_dep_17: &install_dep_17
64
65
66
67
68
  - run:
      name: Install Dependencies
      command: |
        sudo apt-get install -y libopenmpi-dev
        pip install --progress-bar off -r requirements-test.txt
69
        pip install --progress-bar off torch==1.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
70
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
71
72
73
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

74
install_dep_17_cpu: &install_dep_17_cpu
75
76
77
78
79
80
81
82
83
  - run:
      name: Install Dependencies
      command: |
        sudo apt-get install -y libopenmpi-dev
        pip install --progress-bar off -r requirements-test.txt
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

Jun Ru Anderson's avatar
Jun Ru Anderson committed
84
install_repo_cpu: &install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
85
86
87
  - run:
      name: Install Repository
      command: |
88
89
90
        pip install .
        # Test import.
        python -c 'import sys; sys.path = sys.path[1:]; import fairscale'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
91

Jun Ru Anderson's avatar
Jun Ru Anderson committed
92
93
94
95
96
install_repo_gpu: &install_repo_gpu
  - run:
      name: Install Repository
      command: |
        export CUDA_HOME=/usr/local/cuda-10.1
97
        pip install -e .
msbaines's avatar
msbaines committed
98
99
100
101
102

run_coverage: &run_coverage
  - run:
      name: Run Unit Tests With Coverage
      command: |
Benjamin Lefaudeux's avatar
Benjamin Lefaudeux committed
103
        pytest --cov-report=xml --cov=./ --timeout 60
msbaines's avatar
msbaines committed
104
105
106
        #Uploading test coverage for Python code
        bash <(curl -s https://codecov.io/bash) -f coverage.xml -cF Python

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
107
108
109
110
run_unittests: &run_unittests
  - run:
      name: Run Unit Tests
      command: |
Benjamin Lefaudeux's avatar
Benjamin Lefaudeux committed
111
        pytest --junitxml=test-results/junit.xml --verbose --timeout 60
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
112

113
114
115
116
run_mpi_unittests: &run_mpi_unittests
  - run:
      name: Run MPI Unit Tests
      command: |
117
        mpirun -n 4 python -m pytest -p torch_pg.pytest --only-mpi --junitxml=test-results/junit.xml --verbose tests/nn/moe
118

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
119
120
121
122
123
124
run_flake8: &run_flake8
  - run:
      name: Run Linter (flake8)
      command: |
        flake8 --show-source --statistics

Jun Ru Anderson's avatar
Jun Ru Anderson committed
125
run_pipe_benchmark: &run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
126
  - run:
Jun Ru Anderson's avatar
Jun Ru Anderson committed
127
      name: Run Pipe Benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
128
      command: |
Jun Ru Anderson's avatar
Jun Ru Anderson committed
129
        python benchmarks/pipe.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
130

131
132
133
134
run_oss_benchmark: &run_oss_benchmark
  - run:
      name: Run OSS Benchmark
      command: |
135
136
        python benchmarks/oss.py --world_size 4 --epochs 2
        python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp --reference_speed 660 --reference_memory 930 --reference_loss 0.023
137
138

run_oss_gloo: &run_oss_gloo
139
140
141
142
143
144
  - run:
      name: Run OSS with Gloo
      command: |
        python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2
        python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2

145

146
147
148
149
150
run_oss_amp: &run_oss_amp
- run:
    name: Run OSS with Torch AMP
    command: |
      python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp
151

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:
  cpu_tests:
    <<: *cpu

    working_directory: ~/fairscale

    steps:
      - checkout
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-cpu-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

171
      - <<: *install_dep_17_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
172
173
174
175
176
177

      - save_cache:
          paths:
            - ~/venv
          key: cache-key-cpu-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

Jun Ru Anderson's avatar
Jun Ru Anderson committed
178
      - <<: *install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
179
180
181
182

      - run:
          name: Run Linter (isort)
          command: |
183
            isort . --check
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
184
185
186
187
188
189
190
191
192

      - run:
          name: Run Linter (black)
          command: |
            black --check .

      - run:
          name: Run type-checking (mypy)
          command: |
Tom Birch's avatar
Tom Birch committed
193
            mypy --ignore-missing-imports --scripts-are-modules --pretty .
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
194
195
196
197
198

      - <<: *run_flake8

      - <<: *run_unittests

199
200
      - <<: *run_mpi_unittests

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
201
202
203
      - store_test_results:
          path: test-results

204
  gpu_tests_15:
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
221
            - cache-key-gpu15-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
222

223
      - <<: *install_dep_15
224
225
226
227

      - save_cache:
          paths:
            - ~/venv
228
          key: cache-key-gpu15-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
229
230
231
232
233
234
235
236
237

      - <<: *install_repo_gpu

      - <<: *run_unittests

      - store_test_results:
          path: test-results

  gpu_tests_16:
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
254
            - cache-key-gpu16-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
255

256
      - <<: *install_dep_16
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
257
258
259
260

      - save_cache:
          paths:
            - ~/venv
261
          key: cache-key-gpu16-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
262

Jun Ru Anderson's avatar
Jun Ru Anderson committed
263
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
264
265
266

      - <<: *run_unittests

msbaines's avatar
msbaines committed
267
268
      - <<: *run_coverage

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
269
270
271
      - store_test_results:
          path: test-results

272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
  gpu_tests_17:
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-gpu17-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

291
      - <<: *install_dep_17
292
293
294
295
296
297
298
299
300
301
302
303
304

      - save_cache:
          paths:
            - ~/venv
          key: cache-key-gpu17-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

      - <<: *install_repo_gpu

      - <<: *run_unittests

      - store_test_results:
          path: test-results

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
305
306
307
308
309
310
311
312
313
314
  benchmarks:
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

315
316
317
318
      - run: pyenv uninstall -f 3.7.0

      - run: pyenv install 3.7.0

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
319
320
321
322
323
324
325
      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
326
            - cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
327

328
      - <<: *install_dep_17
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
329
330
331
332

      - save_cache:
          paths:
            - ~/venv
333
          key: cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
334

Jun Ru Anderson's avatar
Jun Ru Anderson committed
335
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
336

Jun Ru Anderson's avatar
Jun Ru Anderson committed
337
      - <<: *run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
338

339
340
      - <<: *run_oss_benchmark

341
342
      - <<: *run_oss_gloo

343
344
      - <<: *run_oss_amp

345

346

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
347
348
349
350
351
352

workflows:
  version: 2
  build:
    jobs:
      - cpu_tests
353
      - gpu_tests_15
354
      - gpu_tests_16
355
      - gpu_tests_17
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
356
      - benchmarks