config.yml 9.3 KB
Newer Older
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
# Adopted from
# https://github.com/facebookresearch/detectron2/blob/master/.circleci/config.yml

version: 2

# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
cpu: &cpu
  docker:
    - image: circleci/python:3.7
  resource_class: medium

gpu: &gpu
  environment:
    CUDA_VERSION: "10.1"
  machine:
    image: ubuntu-1604-cuda-10.1:201909-23
  resource_class: gpu.large

# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
      command: |
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip

41
install_dep_15: &install_dep_15
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
42
43
44
  - run:
      name: Install Dependencies
      command: |
45
        sudo apt-get install -y libopenmpi-dev
46
        pip install --progress-bar off torch==1.5.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
47
48
49
50
        pip install --progress-bar off -r requirements-test.txt
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

51
52
53
54
install_dep_16: &install_dep_16
  - run:
      name: Install Dependencies
      command: |
55
        sudo apt-get install -y libopenmpi-dev
56
57
        pip install --progress-bar off torch==1.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
        pip install --progress-bar off -r requirements-test.txt
58
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
59
60
61
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

62
install_dep_17_cpu: &install_dep_17_cpu
63
64
65
66
67
68
  - run:
      name: Install Dependencies
      command: |
        sudo apt-get install -y libopenmpi-dev
        pip install --progress-bar off torch==1.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
        pip install --progress-bar off -r requirements-test.txt
69
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
70
71
72
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env

73
74
75
76
77
78
79
80
81
82
83
84
85
86
install_dep_17_gpu: &install_dep_17_gpu
  # FIXME: need to be removed when properly handling torch 1.7.1
  # short term fix is to override the default pip installed torch
  - run:
      name: Install Dependencies
      command: |
        sudo apt-get install -y libopenmpi-dev
        pip install --progress-bar off -r requirements-test.txt
        pip install --progress-bar off torch==1.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -m torch.utils.collect_env


Jun Ru Anderson's avatar
Jun Ru Anderson committed
87
install_repo_cpu: &install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
88
89
90
  - run:
      name: Install Repository
      command: |
91
92
93
        pip install .
        # Test import.
        python -c 'import sys; sys.path = sys.path[1:]; import fairscale'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
94

Jun Ru Anderson's avatar
Jun Ru Anderson committed
95
96
97
98
99
install_repo_gpu: &install_repo_gpu
  - run:
      name: Install Repository
      command: |
        export CUDA_HOME=/usr/local/cuda-10.1
100
        pip install -e .
msbaines's avatar
msbaines committed
101
102
103
104
105
106
107
108
109

run_coverage: &run_coverage
  - run:
      name: Run Unit Tests With Coverage
      command: |
        pytest --cov-report=xml --cov=./
        #Uploading test coverage for Python code
        bash <(curl -s https://codecov.io/bash) -f coverage.xml -cF Python

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
110
111
112
113
114
115
run_unittests: &run_unittests
  - run:
      name: Run Unit Tests
      command: |
        pytest --junitxml=test-results/junit.xml --verbose

116
117
118
119
run_mpi_unittests: &run_mpi_unittests
  - run:
      name: Run MPI Unit Tests
      command: |
120
        mpirun -n 4 python -m pytest -p torch_pg.pytest --only-mpi --junitxml=test-results/junit.xml --verbose tests/nn/moe
121

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
122
123
124
125
126
127
run_flake8: &run_flake8
  - run:
      name: Run Linter (flake8)
      command: |
        flake8 --show-source --statistics

Jun Ru Anderson's avatar
Jun Ru Anderson committed
128
run_pipe_benchmark: &run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
129
  - run:
Jun Ru Anderson's avatar
Jun Ru Anderson committed
130
      name: Run Pipe Benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
131
      command: |
Jun Ru Anderson's avatar
Jun Ru Anderson committed
132
        python benchmarks/pipe.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
133

134
135
136
137
run_oss_benchmark: &run_oss_benchmark
  - run:
      name: Run OSS Benchmark
      command: |
138
139
        python benchmarks/oss.py --world_size 4 --epochs 2
        python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp --reference_speed 660 --reference_memory 930 --reference_loss 0.023
140
141

run_oss_gloo: &run_oss_gloo
142
143
144
145
146
147
  - run:
      name: Run OSS with Gloo
      command: |
        python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2
        python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2

148

149
150
151
152
153
run_oss_amp: &run_oss_amp
- run:
    name: Run OSS with Torch AMP
    command: |
      python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp
154

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:
  cpu_tests:
    <<: *cpu

    working_directory: ~/fairscale

    steps:
      - checkout
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-cpu-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

174
      - <<: *install_dep_17_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
175
176
177
178
179
180

      - save_cache:
          paths:
            - ~/venv
          key: cache-key-cpu-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

Jun Ru Anderson's avatar
Jun Ru Anderson committed
181
      - <<: *install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
182
183
184
185

      - run:
          name: Run Linter (isort)
          command: |
186
            isort . --check
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
187
188
189
190
191
192
193
194
195

      - run:
          name: Run Linter (black)
          command: |
            black --check .

      - run:
          name: Run type-checking (mypy)
          command: |
Tom Birch's avatar
Tom Birch committed
196
            mypy --ignore-missing-imports --scripts-are-modules --pretty .
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
197
198
199
200
201

      - <<: *run_flake8

      - <<: *run_unittests

202
203
      - <<: *run_mpi_unittests

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
204
205
206
      - store_test_results:
          path: test-results

207
  gpu_tests_15:
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
224
            - cache-key-gpu15-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
225

226
      - <<: *install_dep_15
227
228
229
230

      - save_cache:
          paths:
            - ~/venv
231
          key: cache-key-gpu15-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
232
233
234
235
236
237
238
239
240

      - <<: *install_repo_gpu

      - <<: *run_unittests

      - store_test_results:
          path: test-results

  gpu_tests_16:
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
257
            - cache-key-gpu16-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
258

259
      - <<: *install_dep_16
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
260
261
262
263

      - save_cache:
          paths:
            - ~/venv
264
          key: cache-key-gpu16-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
265

Jun Ru Anderson's avatar
Jun Ru Anderson committed
266
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
267
268
269

      - <<: *run_unittests

msbaines's avatar
msbaines committed
270
271
      - <<: *run_coverage

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
272
273
274
      - store_test_results:
          path: test-results

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
  gpu_tests_17:
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-gpu17-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

294
      - <<: *install_dep_17_gpu
295
296
297
298
299
300
301
302
303
304
305
306
307

      - save_cache:
          paths:
            - ~/venv
          key: cache-key-gpu17-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

      - <<: *install_repo_gpu

      - <<: *run_unittests

      - store_test_results:
          path: test-results

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
308
309
310
311
312
313
314
315
316
317
  benchmarks:
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

318
319
320
321
      - run: pyenv uninstall -f 3.7.0

      - run: pyenv install 3.7.0

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
322
323
324
325
326
327
328
      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
329
            - cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
330

331
      - <<: *install_dep_17_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
332
333
334
335

      - save_cache:
          paths:
            - ~/venv
336
          key: cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
337

Jun Ru Anderson's avatar
Jun Ru Anderson committed
338
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
339

Jun Ru Anderson's avatar
Jun Ru Anderson committed
340
      - <<: *run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
341

342
343
      - <<: *run_oss_benchmark

344
345
      - <<: *run_oss_gloo

346
347
      - <<: *run_oss_amp

348

349

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
350
351
352
353
354
355

workflows:
  version: 2
  build:
    jobs:
      - cpu_tests
356
      - gpu_tests_15
357
      - gpu_tests_16
358
      - gpu_tests_17
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
359
      - benchmarks