config.yml 18.3 KB
Newer Older
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
1
2
3
4
5
# Python CircleCI 2.0 configuration file
#
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
# Adopted from
6
# https://github.com/facebookresearch/detectron2/blob/main/.circleci/config.yml
7
8
#
# Pro tip: download circle ci cli to validate the config locally during development.
9
10
#
# To reset/clean the cache update the CACHE_VERSION variable in project settings
Min Xu's avatar
Min Xu committed
11
# in the fairscale project in CircleCI's web UI. The CACHE_VERSION follows the convention
12
13
14
15
16
# v$(FAIRSCALE_VERSION)-${CACHE_NUMBER}. E.g. v0.4.2-1. CACHE_NUMBER must start
# at 1 and increase in whole numbers. When changing the CACHE_VERSION manually
# always set the FAIRSCALE_VERSION value to the fairscale version being tested.
# To reset the cache when not updating the fairscale version, only update the
# CACHE_NUMBER value.
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
17

Min Xu's avatar
Min Xu committed
18
version: 2.1
19
20
orbs:
  codecov: codecov/codecov@1.0.2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
21
22
23
# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
24
25
26
cpu_py38: &cpu_py38
  docker:
    - image: circleci/python:3.8
27
  resource_class: large
28
29
30
31

cpu_py39: &cpu_py39
  docker:
    - image: circleci/python:3.9
32
  resource_class: large
33

Min Xu's avatar
Min Xu committed
34
35
36
37
38
cpu_py310: &cpu_py310
  docker:
    - image: circleci/python:3.10
  resource_class: large

39
# Here is the list of GPU images:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
40
# https://circleci.com/docs/2.0/configuration-reference/#available-linux-gpu-images
41
42
# We need to use multiple gpus for several jobs. The resource_class
# values are available here T101565170
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
43
44
45
46
# gpu.nvidia.small.multi = 2 gpus with 16 GB ram each
# gpu.nvidia.medium.multi = 4 gpus with 16 GB ram each

gpu_cu_11_2_small_multi: &gpu_cu_11_2_small_multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
47
  environment:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
48
49
    CUDA_VERSION: "11.2"
    CUDA_HOME: /usr/local/cuda-11.2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
50
  machine:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
51
52
    image: ubuntu-2004-cuda-11.2:202103-01
  resource_class: gpu.nvidia.small.multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
53

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
54
gpu_cu_11_2_medium_multi: &gpu_cu_11_2_medium_multi
55
  environment:
56
57
    CUDA_VERSION: "11.2"
    CUDA_HOME: /usr/local/cuda-11.2
58
  machine:
59
    image: ubuntu-2004-cuda-11.2:202103-01
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
60
  resource_class: gpu.nvidia.medium.multi
61

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
62
63
64
65
66
67
68
# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
setup_venv: &setup_venv
  - run:
      name: Setup Virtual Env
      working_directory: ~/
69
70
      # use bash -x for debug early commands executed in .bashrc.
      shell: /bin/bash
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
71
      command: |
72
73
        set -e
        set -o pipefail
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
74
75
76
77
78
79
80
81
        python -m venv ~/venv
        echo ". ~/venv/bin/activate" >> $BASH_ENV
        . ~/venv/bin/activate
        python --version
        which python
        which pip
        pip install --upgrade pip

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
82
# most recent LTS version
83
install_dep_1_8_2: &install_dep_1_8_2
84
  - run:
85
      name: Install Dependencies with torch 1.8.2 (LTS)
86
      command: |
87
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
88
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.8 && exit 0; fi
89
        # start installing
90
        pip install --progress-bar off torch==1.8.2+cu102 torchvision==0.9.2+cu102 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
91
        pip install --progress-bar off -r requirements-dev.txt
92
        pip install --progress-bar off -r requirements-benchmarks.txt
93
94
95
        python -c 'import torch; print("Torch version:", torch.__version__)'
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "8"], "wrong torch version"'
        python -m torch.utils.collect_env
Min Xu's avatar
Min Xu committed
96
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
97

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
98
# most recent stable version
Min Xu's avatar
Min Xu committed
99
install_dep_1_11_0: &install_dep_1_11_0
100
  - run:
Min Xu's avatar
Min Xu committed
101
      name: Install Dependencies with torch 1.11.0
102
103
      command: |
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
Min Xu's avatar
Min Xu committed
104
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.11 && exit 0; fi
105
        # start installing
Min Xu's avatar
Min Xu committed
106
        pip install --progress-bar off torch==1.11.0+cu115 torchvision==0.12.0+cu115 -f https://download.pytorch.org/whl/torch_stable.html
107
        pip install --progress-bar off -r requirements-dev.txt
108
109
        pip install --progress-bar off -r requirements-benchmarks.txt
        python -c 'import torch; print("Torch version:", torch.__version__)'
Min Xu's avatar
Min Xu committed
110
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "11"], "wrong torch version"'
111
112
113
        python -m torch.utils.collect_env
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py

114
115
116
117
118
install_dep_pytorch_nightly: &install_dep_pytorch_nightly
  - run:
      name: Install Dependencies with a torch nightly preview build
      command: |
        # check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
119
        if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.12 && exit 0; fi
120
        # start installing
Min Xu's avatar
Min Xu committed
121
        pip install --progress-bar off --pre torch==1.12.0.dev20220424+cu113 torchvision==0.13.0.dev20220424+cu113 -f https://download.pytorch.org/whl/nightly/cu113/torch_nightly.html
122
        pip install --progress-bar off -r requirements-dev.txt
123
124
        pip install --progress-bar off -r requirements-benchmarks.txt
        python -c 'import torch; print("Torch version:", torch.__version__)'
125
        python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "12"], "wrong torch version"'
126
127
128
        python -m torch.utils.collect_env
        wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py

129
install_repo: &install_repo
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
130
131
132
  - run:
      name: Install Repository
      command: |
133
134
135
        pip install .
        # Test import.
        python -c 'import sys; sys.path = sys.path[1:]; import fairscale'
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
136

137
138
139
140
141
142
check_test_list: &check_test_list
  - run:
      name: Verify that unit test list files are correct
      command: |
        bash ./tests/ci_test_list_check.sh

143
144
145
146
upload_coverage: &upload_coverage
  - codecov/upload:
      file: 'coverage.xml'
      token: $CODECOV_TOKEN
msbaines's avatar
msbaines committed
147

148
149
150
151
run_offload_benchmark: &run_offload_benchmark
  - run:
      name: Run Offload Benchmark
      command: |
152
        python benchmarks/experimental/offload.py --checkpoint_activation
153

154
155
156
157
158
159
run_fsdp_benchmark: &run_fsdp_benchmark
  - run:
      name: Run FSDP Benchmark
      command: |
        python benchmarks/fsdp.py --use_synthetic_data

Jun Ru Anderson's avatar
Jun Ru Anderson committed
160
run_pipe_benchmark: &run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
161
  - run:
Jun Ru Anderson's avatar
Jun Ru Anderson committed
162
      name: Run Pipe Benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
163
      command: |
Jun Ru Anderson's avatar
Jun Ru Anderson committed
164
        python benchmarks/pipe.py
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
165

166
167
168
169
run_oss_benchmark: &run_oss_benchmark
  - run:
      name: Run OSS Benchmark
      command: |
170
        python benchmarks/oss.py --world_size 4 --epochs 2
171
        python benchmarks/oss.py --check_regression --world_size 4 --optim_type oss_sharded_ddp --epochs 12
172
173

run_oss_gloo: &run_oss_gloo
174
175
176
177
178
179
  - run:
      name: Run OSS with Gloo
      command: |
        python benchmarks/oss.py --gloo --optim_type oss_ddp --epochs 2
        python benchmarks/oss.py --gloo --optim_type oss_sharded_ddp --epochs 2

180
run_oss_amp: &run_oss_amp
181
182
183
184
185
   - run:
       name: Run OSS with Torch AMP
       command: |
         python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp

186
187
188
189
190
191
run_oss_for_each: &run_oss_for_each
   - run:
       name: Run OSS with Torch AMP and ForEach optmizer
       command: |
         python benchmarks/oss.py --amp --epochs 3 --optim_type oss_sharded_ddp --multi_tensor_optim

192
193
194
195
196
197
198
199
200
run_doc_build: &run_doc_build
   - run:
       name: Testing doc build
       command: |
         cd docs
         pip install --progress-bar off -r requirements.txt
         make help
         make singlehtml | tee make.out
         ! tail make.out | grep -q warning
201

202
203
204
205
206
207
# This is an alias to run all unit tests possible on a platform.
run_unittests: &run_unittests
   - run:
       name: Run all unit tests.
       # We run all and not stopping on failure on CPU since docker time is cheaper.
       command: |
208
         ulimit -n 10000
209
         pytest --junitxml=test-results/junit.xml --verbose --timeout 60 --cov-report=xml --cov=./
210

Min Xu's avatar
Min Xu committed
211
commands:
212
213
214
215

   # This is a command (like a function) that run tests from a given test_list_file.
   # If test_list_file is not given, this results in an error.
   run_unittests_from_list:
Min Xu's avatar
Min Xu committed
216
     parameters:
217
       test_list_file:
Min Xu's avatar
Min Xu committed
218
         type: string
219
         default: "/dev/non_exist"  # Default to error out
Min Xu's avatar
Min Xu committed
220
221
222
223
     steps:
       - run:
           name: Run Unit Tests
           command: |
224
             ulimit -n 10000
225
             if [ ! -f <<parameters.test_list_file>> ]; then exit 1; fi
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
226
             pytest --junitxml=test-results/junit.xml --verbose --timeout 70 --cov-report=xml --cov=./ `cat <<parameters.test_list_file>>`
Min Xu's avatar
Min Xu committed
227

228
229
230
231
232
233
234
   setup_pyenv:
     parameters:
       version:
         type: string
     steps:
       - run:
           name: Setup pyenv
235
236
237
238
239
           # We used to use the following commands to update pyenv.
           #   git clone https://github.com/pyenv/pyenv-update.git $(pyenv root)/plugins/pyenv-update
           #   pyenv update
           # However, it is not deterministic since pyenv is being updated.
           # It is now fixed to a version. (v2.3.0 is broken since it cause bash to fail when it try to do "eval $(pyenv init -)")
240
           command: |
241
242
243
             cd /opt/circleci/.pyenv/
             git remote update
             git checkout v2.2.0
244
245
246
             pyenv install -f <<parameters.version>>
             pyenv global <<parameters.version>>

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
247
248
249
250
251
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:
Min Xu's avatar
Min Xu committed
252
253
  cpu_tests_py38:
    <<: *cpu_py38
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
254
255
256
257
258

    working_directory: ~/fairscale

    steps:
      - checkout
259
      - <<: *check_test_list
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
260
261
262
263
264
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
265
266
            - cache-key-cpu-py38-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
      - <<: *install_dep_1_11_0
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
267
268
269
270

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
271
          key: cache-key-cpu-py38-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
272

273
      - <<: *install_repo
274
      - <<: *run_unittests
275
276
277
278
      - <<: *run_doc_build

      - store_test_results:
          path: test-results
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
279

Min Xu's avatar
Min Xu committed
280
281
  cpu_tests_py39:
    <<: *cpu_py39
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
282

283
    working_directory: ~/fairscale
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
284

285
286
    steps:
      - checkout
287
      - <<: *check_test_list
288
      - <<: *setup_venv
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
289

290
291
292
      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
293
294
295
            - cache-key-cpu-py39-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}

      - <<: *install_dep_1_11_0
296
297
298
299

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
300
          key: cache-key-cpu-py39-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
301

302
      - <<: *install_repo
303
      - <<: *run_unittests
304
      - <<: *run_doc_build
305

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
306
307
308
      - store_test_results:
          path: test-results

Min Xu's avatar
Min Xu committed
309
310
  cpu_tests_py310:
    <<: *cpu_py310
311
312
313
314
315

    working_directory: ~/fairscale

    steps:
      - checkout
316
      - <<: *check_test_list
317
318
319
320
321
      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
322
            - cache-key-cpu-py310-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
323

Min Xu's avatar
Min Xu committed
324
      - <<: *install_dep_1_11_0
325
326
327
328

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
329
          key: cache-key-cpu-py310-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
330

331
      - <<: *install_repo
332
      - <<: *run_unittests
333
334
335
336
337
      - <<: *run_doc_build

      - store_test_results:
          path: test-results

Min Xu's avatar
Min Xu committed
338
  gpu_tests_lts:
Min Xu's avatar
Min Xu committed
339
    parameters:
340
      test_list_file:
Min Xu's avatar
Min Xu committed
341
        type: string
342
        default: "/dev/non_exist"
Min Xu's avatar
Min Xu committed
343

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
344
    <<: *gpu_cu_11_2_small_multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
345
346
347
348
349
350
351
352

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

353
      # Run this to make sure we use python3 from the system.
354
      - setup_pyenv:
355
          version: 3.9.7
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
356
357
358
359
360
361

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
362
            - cache-key-py-3-9-7-gpu-torch-1-8-2-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
363

364
      - <<: *install_dep_1_8_2
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
365
366
367
368

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
369
          key: cache-key-py-3-9-7-gpu-torch-1-8-2-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
370

371
      - <<: *install_repo
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
372

373
374
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
375
376
377

      - store_test_results:
          path: test-results
378

379
      - <<: *upload_coverage
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
380

Min Xu's avatar
Min Xu committed
381
  gpu_tests_stable:
Min Xu's avatar
Min Xu committed
382
    parameters:
383
      test_list_file:
Min Xu's avatar
Min Xu committed
384
        type: string
385
        default: "/dev/non_exist"
Min Xu's avatar
Min Xu committed
386

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
387
    <<: *gpu_cu_11_2_small_multi
388
389
390
391
392
393
394
395

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

396
      # Run this to make sure we use python3 from the system.
397
      - setup_pyenv:
398
          version: 3.9.7
399
400
401
402
403
404

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
405
            - cache-key-py-3-9-7-gpu-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
406

Min Xu's avatar
Min Xu committed
407
      - <<: *install_dep_1_11_0
408
409
410
411

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
412
          key: cache-key-py-3-9-7-gpu-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
413

414
      - <<: *install_repo
415

416
417
      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>
418
419
420

      - store_test_results:
          path: test-results
421

422
  gpu_tests_pytorch_nightly:
423
424
425
426
427
    parameters:
      test_list_file:
        type: string
        default: "/dev/non_exist"

Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
428
    <<: *gpu_cu_11_2_medium_multi
429
430
431
432
433
434
435
436
437

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

      # Run this to make sure we use python3 from the system.
438
      - setup_pyenv:
439
          version: 3.9.7
440
441
442
443
444
445

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
446
            - cache-key-py-3-9-7-gpu-torch-1-12-0424-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
447

448
      - <<: *install_dep_pytorch_nightly
449
450
451
452

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
453
          key: cache-key-py-3-9-7-gpu-torch-1-12-0424-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
454
455
456
457
458
459
460
461
462

      - <<: *install_repo

      - run_unittests_from_list:
          test_list_file: <<parameters.test_list_file>>

      - store_test_results:
          path: test-results

463
  benchmarks_1:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
464
    <<: *gpu_cu_11_2_small_multi
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
465
466
467
468
469
470
471
472

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

473
      - setup_pyenv:
474
          version: 3.9.7
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
475
476
477
478
479
480

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
481
            - cache-key-py-3-9-7-benchmarks-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
482

483
484
485
      # Cache the MNIST directory that contains benchmark data
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
486
            - cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION}}-{{checksum "benchmarks/datasets/mnist.py"}}
487

Min Xu's avatar
Min Xu committed
488
      - <<: *install_dep_1_11_0
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
489
490
491
492

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
493
          key: cache-key-py-3-9-7-benchmarks-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
494

495
      - <<: *install_repo
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
496

Jun Ru Anderson's avatar
Jun Ru Anderson committed
497
      - <<: *run_pipe_benchmark
Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
498

499
500
      - <<: *run_offload_benchmark

501
502
503
      - <<: *run_oss_amp

      - <<: *run_oss_for_each
504

505
506
      - <<: *run_oss_gloo

507
508
      - <<: *run_fsdp_benchmark

509
510
511
      - save_cache:
          paths:
            - /tmp/MNIST
Min Xu's avatar
Min Xu committed
512
          key: cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION}}-{{checksum "benchmarks/datasets/mnist.py"}}
513

514
  benchmarks_2:
Anupam Bhatnagar's avatar
Anupam Bhatnagar committed
515
    <<: *gpu_cu_11_2_medium_multi
516
517
518
519
520
521
522
523

    working_directory: ~/fairscale

    steps:
      - checkout

      - run: nvidia-smi

524
      - setup_pyenv:
525
          version: 3.9.7
526
527
528
529
530
531

      - <<: *setup_venv

      # Cache the venv directory that contains dependencies
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
532
            - cache-key-py-3-9-7-benchmarks-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
533

534
535
536
537

      # Cache the MNIST directory that contains benchmark data
      - restore_cache:
          keys:
Min Xu's avatar
Min Xu committed
538
            - cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION}}-{{checksum "benchmarks/datasets/mnist.py"}}
539

Min Xu's avatar
Min Xu committed
540
      - <<: *install_dep_1_11_0
541
542
543
544

      - save_cache:
          paths:
            - ~/venv
Min Xu's avatar
Min Xu committed
545
          key: cache-key-py-3-9-7-benchmarks-torch-1-11-0-{{.Environment.CACHE_VERSION}}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}}
546

547
      - <<: *install_repo
548
549

      - <<: *run_oss_benchmark
550

551
552
553
      - save_cache:
          paths:
            - /tmp/MNIST
554
          key: cache-key-benchmark-MNIST-{{.Environment.CACHE_VERSION}}-{{checksum "benchmarks/datasets/mnist.py"}}
555

Mandeep Singh Baines's avatar
Mandeep Singh Baines committed
556
557
558
559
560

workflows:
  version: 2
  build:
    jobs:
561
562
      - cpu_tests_py38
      - cpu_tests_py39
Min Xu's avatar
Min Xu committed
563
564
      - cpu_tests_py310
      - gpu_tests_lts:
565
          test_list_file: tests/ci_test_list_1.txt
Min Xu's avatar
Min Xu committed
566
      - gpu_tests_stable:
567
          test_list_file: tests/ci_test_list_1.txt
568
569
      - gpu_tests_pytorch_nightly:
          test_list_file: tests/ci_test_list_1.txt
Min Xu's avatar
Min Xu committed
570
      - gpu_tests_lts:
571
          test_list_file: tests/ci_test_list_2.txt
Min Xu's avatar
Min Xu committed
572
      - gpu_tests_stable:
573
          test_list_file: tests/ci_test_list_2.txt
574
575
      - gpu_tests_pytorch_nightly:
          test_list_file: tests/ci_test_list_2.txt
Min Xu's avatar
Min Xu committed
576
      - gpu_tests_lts:
577
          test_list_file: tests/ci_test_list_3.txt
Min Xu's avatar
Min Xu committed
578
      - gpu_tests_stable:
579
          test_list_file: tests/ci_test_list_3.txt
580
581
      - gpu_tests_pytorch_nightly:
          test_list_file: tests/ci_test_list_3.txt
582
583
      - benchmarks_1
      - benchmarks_2