config.yml 18.5 KB
Newer Older
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
1
2
3
4
5
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
# Adopted from
6
# https://github.com/facebookresearch/detectron2/blob/main/.circleci/config.yml
7
8
#
# Pro tip: download circle ci cli to validate the config locally during development.
9
10
11
12
13
14
15
16
#
# To reset/clean the cache update the CACHE_VERSION variable in project settings
# in the fairscale project in CircleCI. The CACHE_VERSION follows the convention
# v$(FAIRSCALE_VERSION)-${CACHE_NUMBER}. E.g. v0.4.2-1. CACHE_NUMBER must start
# at 1 and increase in whole numbers. When changing the CACHE_VERSION manually
# always set the FAIRSCALE_VERSION value to the fairscale version being tested.
# To reset the cache when not updating the fairscale version, only update the
# CACHE_NUMBER value.
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
17

Min Xu's avatar
Min Xu committed
18
version: 2.1
19
20
orbs:
  codecov: codecov/codecov@1.0.2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
21
22
23
# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
24
cpu_py37: &cpu_py37
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
25
  docker:
26
    # python version 3.7.12
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
27
    - image: circleci/python:3.7
28
  resource_class: large
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
29

30
31
cpu_py38: &cpu_py38
  docker:
32
    # python version 3.8.12
33
    - image: circleci/python:3.8
34
  resource_class: large
35
36
37

cpu_py39: &cpu_py39
  docker:
38
    # python version 3.9.7
39
    - image: circleci/python:3.9
40
  resource_class: large
41

42
# Here is the list of GPU images:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
43
# https://circleci.com/docs/2.0/configuration-reference/#available-linux-gpu-images
44
45
# We need to use multiple gpus for several jobs. The resource_class
# values are available here T101565170
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
46
47
48
49
# gpu.nvidia.small.multi = 2 gpus with 16 GB ram each
# gpu.nvidia.medium.multi = 4 gpus with 16 GB ram each

gpu_cu_11_2_small_multi: &gpu_cu_11_2_small_multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
50
  environment:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
51
52
    CUDA_VERSION: "11.2"
    CUDA_HOME: /usr/local/cuda-11.2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
53
  machine:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
54
55
    image: ubuntu-2004-cuda-11.2:202103-01
  resource_class: gpu.nvidia.small.multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
56

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
57
gpu_cu_11_2_medium_multi: &gpu_cu_11_2_medium_multi
58
  environment:
59
60
    CUDA_VERSION: "11.2"
    CUDA_HOME: /usr/local/cuda-11.2
61
  machine:
62
    image: ubuntu-2004-cuda-11.2:202103-01
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
63
  resource_class: gpu.nvidia.medium.multi
64

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
65
66
67
68
69
70
71
# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
72
73
      # use bash -x for debug early commands executed in .bashrc.
      shell: /bin/bash
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
74
      command: |
75
76
        set -e
        set -o pipefail
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
77
78
79
80
81
82
83
84
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
85
# most recent LTS version
86
install_dep_1_8_2: &install_dep_1_8_2
87
  - run:
88
      name: Install Dependencies with torch 1.8.2 (LTS)
89
      command: |
90
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
91
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.8 && exit 0; fi
92
        # start installing
93
        pip install --progress-bar off torch==1.8.2+cu102 torchvision==0.9.2+cu102 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
94
        pip install --progress-bar off -r requirements-dev.txt
95
        pip install --progress-bar off -r requirements-benchmarks.txt
96
97
98
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "8"], "wrong torch version"'
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
99
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
100

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
101
# most recent stable version
102
install_dep_1_10_2: &install_dep_1_10_2
103
  - run:
104
      name: Install Dependencies with torch 1.10.2
105
106
      command: |
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
107
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.10 && exit 0; fi
108
        # start installing
109
        pip install --progress-bar off torch==1.10.2+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/torch_stable.html
110
        pip install --progress-bar off -r requirements-dev.txt
111
112
        pip install --progress-bar off -r requirements-benchmarks.txt
        python -c 'import torch; print("Torch version:", torch.__version__)'
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
113
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "10"], "wrong torch version"'
114
115
116
        python -m torch.utils.collect_env
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py

117
118
119
120
121
install_dep_pytorch_nightly: &install_dep_pytorch_nightly
  - run:
      name: Install Dependencies with a torch nightly preview build
      command: |
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
122
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.12 && exit 0; fi
123
        # start installing
Min Xu's avatar
Min Xu committed
124
        pip install --progress-bar off --pre torch==1.12.0.dev20220424+cu113 torchvision==0.13.0.dev20220424+cu113 -f https://download.pytorch.org/whl/nightly/cu113/torch_nightly.html
125
        pip install --progress-bar off -r requirements-dev.txt
126
127
        pip install --progress-bar off -r requirements-benchmarks.txt
        python -c 'import torch; print("Torch version:", torch.__version__)'
128
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "12"], "wrong torch version"'
129
130
131
        python -m torch.utils.collect_env
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py

132
install_repo: &install_repo
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
133
134
135
  - run:
      name: Install Repository
      command: |
136
137
138
        pip install .
        # Test import.
        python -c 'import sys; sys.path = sys.path[1:]; import fairscale'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
139

140
141
142
143
144
145
check_test_list: &check_test_list
  - run:
      name: Verify that unit test list files are correct
      command: |
        bash ./tests/ci_test_list_check.sh

146
147
148
149
upload_coverage: &upload_coverage
  - codecov/upload:
      file: 'coverage.xml'
      token: $CODECOV_TOKEN
msbaines's avatar
msbaines committed
150

151
152
153
154
run_offload_benchmark: &run_offload_benchmark
  - run:
      name: Run Offload Benchmark
      command: |
155
        python benchmarks/experimental/offload.py --checkpoint_activation
156

157
158
159
160
161
162
run_fsdp_benchmark: &run_fsdp_benchmark
  - run:
      name: Run FSDP Benchmark
      command: |
        python benchmarks/fsdp.py --use_synthetic_data

Jun Ru Anderson's avatar
Jun Ru Anderson committed
163
run_pipe_benchmark: &run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
164
  - run:
Jun Ru Anderson's avatar
Jun Ru Anderson committed
165
      name: Run Pipe Benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
166
      command: |
Jun Ru Anderson's avatar
Jun Ru Anderson committed
167
        python benchmarks/pipe.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
168

169
170
171
172
run_oss_benchmark: &run_oss_benchmark
  - run:
      name: Run OSS Benchmark
      command: |
173
        python benchmarks/oss.py --world_size 4 --epochs 2
174
        python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp --epochs 12
175
176

run_oss_gloo: &run_oss_gloo
177
178
179
180
181
182
  - run:
      name: Run OSS with Gloo
      command: |
        python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2
        python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2

183
run_oss_amp: &run_oss_amp
184
185
186
187
188
   - run:
       name: Run OSS with Torch AMP
       command: |
         python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp

189
190
191
192
193
194
run_oss_for_each: &run_oss_for_each
   - run:
       name: Run OSS with Torch AMP and ForEach optmizer
       command: |
         python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp --multi_tensor_optim

195
196
197
198
199
200
201
202
203
run_doc_build: &run_doc_build
   - run:
       name: Testing doc build
       command: |
         cd docs
         pip install --progress-bar off -r requirements.txt
         make help
         make singlehtml | tee make.out
         ! tail make.out | grep -q warning
204

205
206
207
208
209
210
# This is an alias to run all unit tests possible on a platform.
run_unittests: &run_unittests
   - run:
       name: Run all unit tests.
       # We run all and not stopping on failure on CPU since docker time is cheaper.
       command: |
211
         ulimit -n 10000
212
         pytest --junitxml=test-results/junit.xml --verbose --timeout 60 --cov-report=xml --cov=./
213

Min Xu's avatar
Min Xu committed
214
commands:
215
216
217
218

   # This is a command (like a function) that run tests from a given test_list_file.
   # If test_list_file is not given, this results in an error.
   run_unittests_from_list:
Min Xu's avatar
Min Xu committed
219
     parameters:
220
       test_list_file:
Min Xu's avatar
Min Xu committed
221
         type: string
222
         default: "/dev/non_exist"  # Default to error out
Min Xu's avatar
Min Xu committed
223
224
225
226
     steps:
       - run:
           name: Run Unit Tests
           command: |
227
             ulimit -n 10000
228
             if [ ! -f <<parameters.test_list_file>> ]; then exit 1; fi
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
229
             pytest --junitxml=test-results/junit.xml --verbose --timeout 70 --cov-report=xml --cov=./ `cat <<parameters.test_list_file>>`
Min Xu's avatar
Min Xu committed
230

231
232
233
234
235
236
237
   setup_pyenv:
     parameters:
       version:
         type: string
     steps:
       - run:
           name: Setup pyenv
238
239
240
241
242
           # We used to use the following commands to update pyenv.
           #   git clone https://github.com/pyenv/pyenv-update.git $(pyenv root)/plugins/pyenv-update
           #   pyenv update
           # However, it is not deterministic since pyenv is being updated.
           # It is now fixed to a version. (v2.3.0 is broken since it cause bash to fail when it try to do "eval $(pyenv init -)")
243
           command: |
244
245
246
             cd /opt/circleci/.pyenv/
             git remote update
             git checkout v2.2.0
247
248
249
             pyenv install -f <<parameters.version>>
             pyenv global <<parameters.version>>

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
250
251
252
253
254
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:
255
256
  cpu_tests_py37:
    <<: *cpu_py37
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
257
258
259
260
261

    working_directory: ~/fairscale

    steps:
      - checkout
262
      - <<: *check_test_list
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
263
264
265
266
267
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
268
            - cache-key-cpu-py37-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
269

270
      - <<: *install_dep_1_10_2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
271
272
273
274

      - save_cache:
          paths:
            - ~/venv
275
          key: cache-key-cpu-py37-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
276

277
      - <<: *install_repo
278
      - <<: *run_unittests
279
280
281
282
      - <<: *run_doc_build

      - store_test_results:
          path: test-results
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
283

284
285
  cpu_tests_py38:
    <<: *cpu_py38
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
286

287
    working_directory: ~/fairscale
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
288

289
290
    steps:
      - checkout
291
      - <<: *check_test_list
292
      - <<: *setup_venv
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
293

294
295
296
      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
297
            - cache-key-cpu-py38-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
298
      - <<: *install_dep_1_10_2
299
300
301
302

      - save_cache:
          paths:
            - ~/venv
303
          key: cache-key-cpu-py38-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
304

305
      - <<: *install_repo
306
      - <<: *run_unittests
307
      - <<: *run_doc_build
308

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
309
310
311
      - store_test_results:
          path: test-results

312
313
314
315
316
317
318
  cpu_tests_py39:
    <<: *cpu_py39

    working_directory: ~/fairscale

    steps:
      - checkout
319
      - <<: *check_test_list
320
321
322
323
324
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
325
            - cache-key-cpu-py39-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
326

327
      - <<: *install_dep_1_10_2
328
329
330
331

      - save_cache:
          paths:
            - ~/venv
332
          key: cache-key-cpu-py39-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
333

334
      - <<: *install_repo
335
      - <<: *run_unittests
336
337
338
339
340
      - <<: *run_doc_build

      - store_test_results:
          path: test-results

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
341
  gpu_tests_1_8_1:
Min Xu's avatar
Min Xu committed
342
    parameters:
343
      test_list_file:
Min Xu's avatar
Min Xu committed
344
        type: string
345
        default: "/dev/non_exist"
Min Xu's avatar
Min Xu committed
346

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
347
    <<: *gpu_cu_11_2_small_multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
348
349
350
351
352
353
354
355

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

356
      # Run this to make sure we use python3 from the system.
357
      - setup_pyenv:
358
          version: 3.9.7
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
359
360
361
362
363
364

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
365
            - cache-key-py-3-9-7-gpu-torch-1-8-1-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
366

367
      - <<: *install_dep_1_8_2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
368
369
370
371

      - save_cache:
          paths:
            - ~/venv
372
          key: cache-key-py-3-9-7-gpu-torch-1-8-1-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
373

374
      - <<: *install_repo
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
375

376
377
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
378
379
380

      - store_test_results:
          path: test-results
381

382
      - <<: *upload_coverage
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
383

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
384
  gpu_tests_1_10_0:
Min Xu's avatar
Min Xu committed
385
    parameters:
386
      test_list_file:
Min Xu's avatar
Min Xu committed
387
        type: string
388
        default: "/dev/non_exist"
Min Xu's avatar
Min Xu committed
389

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
390
    <<: *gpu_cu_11_2_small_multi
391
392
393
394
395
396
397
398

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

399
      # Run this to make sure we use python3 from the system.
400
      - setup_pyenv:
401
          version: 3.9.7
402
403
404
405
406
407

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
408
            - cache-key-py-3-9-7-gpu-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
409

410
      - <<: *install_dep_1_10_2
411
412
413
414

      - save_cache:
          paths:
            - ~/venv
415
          key: cache-key-py-3-9-7-gpu-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
416

417
      - <<: *install_repo
418

419
420
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
421
422
423

      - store_test_results:
          path: test-results
424

425
  gpu_tests_pytorch_nightly:
426
427
428
429
430
    parameters:
      test_list_file:
        type: string
        default: "/dev/non_exist"

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
431
    <<: *gpu_cu_11_2_medium_multi
432
433
434
435
436
437
438
439
440

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      # Run this to make sure we use python3 from the system.
441
      - setup_pyenv:
442
          version: 3.9.7
443
444
445
446
447
448

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
449
            - cache-key-py-3-9-7-gpu-pytorch-nightly-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
450

451
      - <<: *install_dep_pytorch_nightly
452
453
454
455

      - save_cache:
          paths:
            - ~/venv
456
          key: cache-key-py-3-9-7-gpu-pytorch-nightly-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
457
458
459
460
461
462
463
464
465

      - <<: *install_repo

      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>

      - store_test_results:
          path: test-results

466
  benchmarks_1:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
467
    <<: *gpu_cu_11_2_small_multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
468
469
470
471
472
473
474
475

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

476
      - setup_pyenv:
477
          version: 3.9.7
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
478
479
480
481
482
483

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
484
            - cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
485

486
487
488
      # Cache the MNIST directory that contains benchmark data
      - restore_cache:
          keys:
489
            - cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION }}-{{checksum "benchmarks/datasets/mnist.py"}}
490

491
      - <<: *install_dep_1_10_2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
492
493
494
495

      - save_cache:
          paths:
            - ~/venv
496
          key: cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
497

498
      - <<: *install_repo
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
499

Jun Ru Anderson's avatar
Jun Ru Anderson committed
500
      - <<: *run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
501

502
503
      - <<: *run_offload_benchmark

504
505
506
      - <<: *run_oss_amp

      - <<: *run_oss_for_each
507

508
509
      - <<: *run_oss_gloo

510
511
      - <<: *run_fsdp_benchmark

512
513
514
      - save_cache:
          paths:
            - /tmp/MNIST
515
          key: cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION }}-{{checksum "benchmarks/datasets/mnist.py"}}
516

517
  benchmarks_2:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
518
    <<: *gpu_cu_11_2_medium_multi
519
520
521
522
523
524
525
526

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

527
      - setup_pyenv:
528
          version: 3.9.7
529
530
531
532
533
534

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
535
            - cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
536

537
538
539
540

      # Cache the MNIST directory that contains benchmark data
      - restore_cache:
          keys:
541
            - cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION }}-{{checksum "benchmarks/datasets/mnist.py"}}
542

543
      - <<: *install_dep_1_10_2
544
545
546
547

      - save_cache:
          paths:
            - ~/venv
548
          key: cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
549

550
      - <<: *install_repo
551
552

      - <<: *run_oss_benchmark
553

554
555
556
      - save_cache:
          paths:
            - /tmp/MNIST
557
          key: cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION}}-{{checksum "benchmarks/datasets/mnist.py"}}
558

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
559
560
561
562
563

workflows:
  version: 2
  build:
    jobs:
564
565
566
      - cpu_tests_py37
      - cpu_tests_py38
      - cpu_tests_py39
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
567
      - gpu_tests_1_8_1:
568
          test_list_file: tests/ci_test_list_1.txt
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
569
      - gpu_tests_1_10_0:
570
          test_list_file: tests/ci_test_list_1.txt
571
572
      - gpu_tests_pytorch_nightly:
          test_list_file: tests/ci_test_list_1.txt
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
573
      - gpu_tests_1_8_1:
574
          test_list_file: tests/ci_test_list_2.txt
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
575
      - gpu_tests_1_10_0:
576
          test_list_file: tests/ci_test_list_2.txt
577
578
      - gpu_tests_pytorch_nightly:
          test_list_file: tests/ci_test_list_2.txt
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
579
      - gpu_tests_1_8_1:
580
          test_list_file: tests/ci_test_list_3.txt
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
581
      - gpu_tests_1_10_0:
582
          test_list_file: tests/ci_test_list_3.txt
583
584
      - gpu_tests_pytorch_nightly:
          test_list_file: tests/ci_test_list_3.txt
585
586
      - benchmarks_1
      - benchmarks_2