config.yml 20.4 KB
Newer Older
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
1
2
3
4
5
6
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
# Adopted from
# https://github.com/facebookresearch/detectron2/blob/master/.circleci/config.yml
7
8
#
# Pro tip: download circle ci cli to validate the config locally during development.
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
9

Min Xu's avatar
Min Xu committed
10
version: 2.1
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
11
12
13
14

# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
15
cpu_py37: &cpu_py37
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
16
17
18
19
  docker:
    - image: circleci/python:3.7
  resource_class: medium

20
21
22
23
24
25
26
27
28
29
cpu_py38: &cpu_py38
  docker:
    - image: circleci/python:3.8
  resource_class: medium

cpu_py39: &cpu_py39
  docker:
    - image: circleci/python:3.9
  resource_class: medium

30
31
# Here are list of GPU images:
#   https://circleci.com/docs/2.0/configuration-reference/#available-linux-gpu-images
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
32
33
34
35
36
37
38
gpu: &gpu
  environment:
    CUDA_VERSION: "10.1"
  machine:
    image: ubuntu-1604-cuda-10.1:201909-23
  resource_class: gpu.large

39
40
41
42
43
44
45
gpu_cu111: &gpu_cu111
  environment:
    CUDA_VERSION: "11.1"
  machine:
    image: ubuntu-1604-cuda-11.1:202012-01
  resource_class: gpu.large

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
      command: |
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip

62
install_dep_151: &install_dep_151
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
63
  - run:
64
      name: Install Dependencies with torch 1.5.1
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
65
      command: |
66
        sudo apt-get install -y libopenmpi-dev
67
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
68
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.5 && exit 0; fi
69
        # start installing
70
        pip install --progress-bar off torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
71
        pip install --progress-bar off -r requirements-test.txt
72
        pip install --progress-bar off -r requirements-benchmarks.txt
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
73
        python -c 'import torch; print("Torch version:", torch.__version__)'
74
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "5"], "wrong torch version"'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
75
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
76
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
77

78
install_dep_160: &install_dep_160
79
  - run:
80
      name: Install Dependencies with torch 1.6.0
81
      command: |
82
        sudo apt-get install -y libopenmpi-dev
83
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
84
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.6 && exit 0; fi
85
        # start installing
86
        pip install --progress-bar off torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
87
        pip install --progress-bar off -r requirements-test.txt
88
        pip install --progress-bar off -r requirements-benchmarks.txt
89
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
90
        python -c 'import torch; print("Torch version:", torch.__version__)'
91
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "6"], "wrong torch version"'
92
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
93
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
94

95
install_dep_171: &install_dep_171
96
  - run:
97
      name: Install Dependencies with torch 1.7.1
98
99
      command: |
        sudo apt-get install -y libopenmpi-dev
100
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
101
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.7 && exit 0; fi
102
        # start installing
103
        pip install --progress-bar off torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html
104
        pip install --progress-bar off -r requirements-test.txt
105
        pip install --progress-bar off -r requirements-benchmarks.txt
106
107
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
        python -c 'import torch; print("Torch version:", torch.__version__)'
108
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "7"], "wrong torch version"'
109
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
110
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
111

112
113
114
115
116
117
118
install_dep_171_cu110: &install_dep_171_cu110
  - run:
      name: Install Dependencies with torch 1.7.1+cu110
      command: |
        sudo add-apt-repository universe
        sudo apt-get update
        sudo apt-get install -y libopenmpi-dev
119
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
120
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.7 && exit 0; fi
121
        # start installing
122
123
124
125
126
127
128
        pip install --progress-bar off torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html
        pip install --progress-bar off -r requirements-test.txt
        pip install --progress-bar off -r requirements-benchmarks.txt
        pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "7"], "wrong torch version"'
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
129
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
130

131
132
133
134
135
install_dep_180: &install_dep_180
  - run:
      name: Install Dependencies with torch 1.8.0 nightly
      command: |
        sudo apt-get install -y libopenmpi-dev
136
        # check if we have restored cache correctly, if so, just skip
Min Xu's avatar
Min Xu committed
137
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.8 && exit 0; fi
138
        # start installing
139
        pip install --progress-bar off -r requirements-test.txt
140
141
        # Since we are using nightly builds, we bypass the benchmarks req file
        # and install ourselves for testing.
142
        #pip install --progress-bar off -r requirements-benchmarks.txt
143
144
145
146
147
148
149
150
        # torchvision nightly wants torch 1.9.
        pip install --pre --progress-bar off torchtext==0.6.0 \
          torchvision==0.9.0.dev20210222+cu112 \
          -f https://download.pytorch.org/whl/nightly/cu112/torch_nightly.html
        # we only use it a bit in benchmarking, so it might be safe to use 1.8.
        pip install --pre --progress-bar off torch==1.8.0.dev20210210+cu112 \
          -f https://download.pytorch.org/whl/nightly/cu112/torch_nightly.html
        pip install --progress-bar off  git+https://github.com/min-xu-ai/torch_pg.git@c723ab4#egg=torch-pg
151
152
153
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "8"], "wrong torch version"'
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
154
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
155

Jun Ru Anderson's avatar
Jun Ru Anderson committed
156
install_repo_cpu: &install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
157
158
159
  - run:
      name: Install Repository
      command: |
160
161
162
        pip install .
        # Test import.
        python -c 'import sys; sys.path = sys.path[1:]; import fairscale'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
163

Jun Ru Anderson's avatar
Jun Ru Anderson committed
164
165
166
167
168
install_repo_gpu: &install_repo_gpu
  - run:
      name: Install Repository
      command: |
        export CUDA_HOME=/usr/local/cuda-10.1
169
        pip install -e .
msbaines's avatar
msbaines committed
170

171
172
173
174
175
176
177
install_repo_gpu_cu111: &install_repo_gpu_cu111
  - run:
      name: Install Repository
      command: |
        export CUDA_HOME=/usr/local/cuda-11.1
        pip install -e .

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

run_isort: &run_isort
   - run:
       name: Run Linter (isort)
       command: |
         isort . --check

run_black: &run_black
   - run:
       name: Run Linter (black)
       command: |
         black --check .

run_mypy: &run_mypy
   - run:
       name: Run type-checking (mypy)
       command: |
         mypy --ignore-missing-imports --scripts-are-modules --pretty .

run_flake8: &run_flake8
  - run:
      name: Run Linter (flake8)
      command: |
        flake8 --show-source --statistics

203
204
205
206
207
208
check_test_list: &check_test_list
  - run:
      name: Verify that unit test list files are correct
      command: |
        bash ./tests/ci_test_list_check.sh

209

Min Xu's avatar
Min Xu committed
210
211
212
213
# TODO (Min): figure out how to do coverage nightly or on-demand. Doing it
# on every commit seems like an overkill since we can easily figure out which
# code is not covered without looking at coverage results from each commit.
# Also, it is a long pole for testing time, which slows down development a lot.
msbaines's avatar
msbaines committed
214
215
216
217
run_coverage: &run_coverage
  - run:
      name: Run Unit Tests With Coverage
      command: |
218
        pytest --junitxml=test-results/junit.xml --verbose --timeout 60 --cov-report=xml --cov=./
msbaines's avatar
msbaines committed
219
220
221
        #Uploading test coverage for Python code
        bash <(curl -s https://codecov.io/bash) -f coverage.xml -cF Python

222
223
224
225
run_mpi_unittests: &run_mpi_unittests
  - run:
      name: Run MPI Unit Tests
      command: |
226
        mpirun -n 4 python -m pytest -p torch_pg.pytest --only-mpi --junitxml=test-results/junit.xml --verbose tests/nn/moe
227

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
228

Jun Ru Anderson's avatar
Jun Ru Anderson committed
229
run_pipe_benchmark: &run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
230
  - run:
Jun Ru Anderson's avatar
Jun Ru Anderson committed
231
      name: Run Pipe Benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
232
      command: |
Jun Ru Anderson's avatar
Jun Ru Anderson committed
233
        python benchmarks/pipe.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
234

235
236
237
238
run_mp_pipe_benchmark: &run_mp_pipe_benchmark
  - run:
      name: Run Multiprocess Pipe Benchmark
      command: |
239
        python benchmarks/pipe.py --multiprocess --lazy-construction
240

241
242
243
244
run_oss_benchmark: &run_oss_benchmark
  - run:
      name: Run OSS Benchmark
      command: |
245
        python benchmarks/oss.py --world_size 4 --epochs 2
246
        python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp
247
248

run_oss_gloo: &run_oss_gloo
249
250
251
252
253
254
  - run:
      name: Run OSS with Gloo
      command: |
        python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2
        python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2

255
run_oss_amp: &run_oss_amp
256
257
258
259
260
   - run:
       name: Run OSS with Torch AMP
       command: |
         python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp

261
262
263
264
265
266
run_oss_for_each: &run_oss_for_each
   - run:
       name: Run OSS with Torch AMP and ForEach optmizer
       command: |
         python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp --multi_tensor_optim

267
268
269
270
271
272
273
274
275
276

run_doc_build: &run_doc_build
   - run:
       name: Testing doc build
       command: |
         cd docs
         pip install --progress-bar off -r requirements.txt
         make help
         make singlehtml | tee make.out
         ! tail make.out | grep -q warning
277

278
279
280
281
282
283
284
285
# This is an alias to run all unit tests possible on a platform.
run_unittests: &run_unittests
   - run:
       name: Run all unit tests.
       # We run all and not stopping on failure on CPU since docker time is cheaper.
       command: |
         pytest --junitxml=test-results/junit.xml --verbose --timeout 60

Min Xu's avatar
Min Xu committed
286
commands:
287
288
289
290

   # This is a command (like a function) that run tests from a given test_list_file.
   # If test_list_file is not given, this results in an error.
   run_unittests_from_list:
Min Xu's avatar
Min Xu committed
291
     parameters:
292
       test_list_file:
Min Xu's avatar
Min Xu committed
293
         type: string
294
         default: "/dev/non_exist"  # Default to error out
Min Xu's avatar
Min Xu committed
295
296
297
     steps:
       - run:
           name: Run Unit Tests
298
           # we use pytest -x so that it stops on first failure to save GPU time, which is expensive.
Min Xu's avatar
Min Xu committed
299
           command: |
300
301
             if [ ! -f <<parameters.test_list_file>> ]; then exit 1; fi
             pytest -x --junitxml=test-results/junit.xml --verbose --timeout 60 `cat <<parameters.test_list_file>>`
Min Xu's avatar
Min Xu committed
302

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
303
304
305
306
307
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:
308
309
  cpu_tests_py37:
    <<: *cpu_py37
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
310
311
312
313
314

    working_directory: ~/fairscale

    steps:
      - checkout
315
      - <<: *check_test_list
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
316
317
318
319
320
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
321
            - cache-key-cpu-py37-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
322

323
      - <<: *install_dep_171
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
324
325
326
327

      - save_cache:
          paths:
            - ~/venv
328
          key: cache-key-cpu-py37-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
329

Jun Ru Anderson's avatar
Jun Ru Anderson committed
330
      - <<: *install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
331

332
333
334
335
      - <<: *run_isort
      - <<: *run_black
      - <<: *run_mypy
      - <<: *run_flake8
336
      - <<: *run_unittests
337
338
339
340
341
      - <<: *run_mpi_unittests
      - <<: *run_doc_build

      - store_test_results:
          path: test-results
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
342

343
344
  cpu_tests_py38:
    <<: *cpu_py38
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
345

346
    working_directory: ~/fairscale
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
347

348
349
    steps:
      - checkout
350
      - <<: *check_test_list
351
      - <<: *setup_venv
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
352

353
354
355
356
357
358
359
360
361
362
363
364
      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-cpu-py38-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
      - <<: *install_dep_171

      - save_cache:
          paths:
            - ~/venv
          key: cache-key-cpu-py38-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

      - <<: *install_repo_cpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
365

366
367
368
369
      - <<: *run_isort
      - <<: *run_black
      - <<: *run_mypy
      - <<: *run_flake8
370
      - <<: *run_unittests
371
      - <<: *run_mpi_unittests
372
      - <<: *run_doc_build
373

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
374
375
376
      - store_test_results:
          path: test-results

377
378
379
380
381
382
383
  cpu_tests_py39:
    <<: *cpu_py39

    working_directory: ~/fairscale

    steps:
      - checkout
384
      - <<: *check_test_list
385
386
387
388
389
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
390
            - cache-key-cpu-py39-180-3-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
391

392
393
394
395
396
      # py3.9 doesn't work well with torch < 1.8. See this PR:
      # https://github.com/pytorch/pytorch/pull/50998
      #
      # Therefore, we test py39 with torch 1.8.0.
      - <<: *install_dep_180
397
398
399
400

      - save_cache:
          paths:
            - ~/venv
401
          key: cache-key-cpu-py39-180-3-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
402
403
404
405
406
407
408

      - <<: *install_repo_cpu

      - <<: *run_isort
      - <<: *run_black
      - <<: *run_mypy
      - <<: *run_flake8
409
      - <<: *run_unittests
410
      - <<: *run_mpi_unittests
411
412
413
414
415
416
      - <<: *run_doc_build

      - store_test_results:
          path: test-results


417
  gpu_tests_151:
Min Xu's avatar
Min Xu committed
418
    parameters:
419
      test_list_file:
Min Xu's avatar
Min Xu committed
420
        type: string
421
        default: "/dev/non_exist"
422

423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
439
            - cache-key-gpu-151-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
440

441
      - <<: *install_dep_151
442
443
444
445

      - save_cache:
          paths:
            - ~/venv
446
          key: cache-key-gpu-151-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
447
448
449

      - <<: *install_repo_gpu

450
451
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
452
453
454
455

      - store_test_results:
          path: test-results

456
  gpu_tests_160:
Min Xu's avatar
Min Xu committed
457
    parameters:
458
      test_list_file:
Min Xu's avatar
Min Xu committed
459
        type: string
460
        default: "/dev/non_exist"
Min Xu's avatar
Min Xu committed
461

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
478
            - cache-key-gpu-160-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
479

480
      - <<: *install_dep_160
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
481
482
483
484

      - save_cache:
          paths:
            - ~/venv
485
          key: cache-key-gpu-160-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
486

Jun Ru Anderson's avatar
Jun Ru Anderson committed
487
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
488

489
490
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
491
492
493
494

      - store_test_results:
          path: test-results

495
  gpu_tests_171:
Min Xu's avatar
Min Xu committed
496
    parameters:
497
      test_list_file:
Min Xu's avatar
Min Xu committed
498
        type: string
499
        default: "/dev/non_exist"
Min Xu's avatar
Min Xu committed
500

501
    <<: *gpu_cu111
502
503
504
505
506
507
508
509

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

510
511
      # Run this to make sure we use python3 from the system.
      - run: pyenv global 3.8.6
512
513
514
515
516
517

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
518
            - cache-key-gpu-cu111-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
519

520
      - <<: *install_dep_171_cu110
521
522
523
524

      - save_cache:
          paths:
            - ~/venv
525
          key: cache-key-gpu-cu111-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
526

527
      - <<: *install_repo_gpu_cu111
528

529
530
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
531
532
533
534

      - store_test_results:
          path: test-results

535
  benchmarks_1:
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
536
537
538
539
540
541
542
543
544
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

545
546
547
548
      - run: pyenv uninstall -f 3.7.0

      - run: pyenv install 3.7.0

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
549
550
551
552
553
554
555
      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
556
            - cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
557

558
559
560
561
562
      # Cache the MNIST directory that contains benchmark data
      - restore_cache:
          keys:
            - cache-key-benchmark-MNIST-{{ checksum "benchmarks/datasets/mnist.py"}}

563
      - <<: *install_dep_171
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
564
565
566
567

      - save_cache:
          paths:
            - ~/venv
568
          key: cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
569

Jun Ru Anderson's avatar
Jun Ru Anderson committed
570
      - <<: *install_repo_gpu
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
571

Jun Ru Anderson's avatar
Jun Ru Anderson committed
572
      - <<: *run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
573

574
575
      - <<: *run_mp_pipe_benchmark

576
577
578
      - <<: *run_oss_amp

      - <<: *run_oss_for_each
579

580
581
      - <<: *run_oss_gloo

582
583
584
585
586
      - save_cache:
          paths:
            - /tmp/MNIST
          key: cache-key-benchmark-MNIST-{{ checksum "benchmarks/datasets/mnist.py"}}

587

588

589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
  benchmarks_2:
    <<: *gpu

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      - run: pyenv uninstall -f 3.7.0

      - run: pyenv install 3.7.0

      - run: pyenv global 3.7.0

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
            - cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

612
613
614
615
616
617

      # Cache the MNIST directory that contains benchmark data
      - restore_cache:
          keys:
            - cache-key-benchmark-MNIST-{{ checksum "benchmarks/datasets/mnist.py"}}

618
619
620
621
622
623
624
625
626
627
      - <<: *install_dep_171

      - save_cache:
          paths:
            - ~/venv
          key: cache-key-benchmarks-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}

      - <<: *install_repo_gpu

      - <<: *run_oss_benchmark
628

629
630
631
632
633
      - save_cache:
          paths:
            - /tmp/MNIST
          key: cache-key-benchmark-MNIST-{{ checksum "benchmarks/datasets/mnist.py"}}

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
634
635
636
637
638

workflows:
  version: 2
  build:
    jobs:
639
640
641
      - cpu_tests_py37
      - cpu_tests_py38
      - cpu_tests_py39
Min Xu's avatar
Min Xu committed
642
      - gpu_tests_151:
643
          test_list_file: tests/ci_test_list_1.txt
Min Xu's avatar
Min Xu committed
644
      - gpu_tests_160:
645
          test_list_file: tests/ci_test_list_1.txt
Min Xu's avatar
Min Xu committed
646
      - gpu_tests_171:
647
          test_list_file: tests/ci_test_list_1.txt
Min Xu's avatar
Min Xu committed
648
      - gpu_tests_151:
649
          test_list_file: tests/ci_test_list_2.txt
Min Xu's avatar
Min Xu committed
650
      - gpu_tests_160:
651
          test_list_file: tests/ci_test_list_2.txt
Min Xu's avatar
Min Xu committed
652
      - gpu_tests_171:
653
          test_list_file: tests/ci_test_list_2.txt
Min Xu's avatar
Min Xu committed
654
      - gpu_tests_151:
655
          test_list_file: tests/ci_test_list_3.txt
Min Xu's avatar
Min Xu committed
656
      - gpu_tests_160:
657
          test_list_file: tests/ci_test_list_3.txt
Min Xu's avatar
Min Xu committed
658
      - gpu_tests_171:
659
660
661
          test_list_file: tests/ci_test_list_3.txt
      - benchmarks_1
      - benchmarks_2