config.yml 17.5 KB
Newer Older
1
2
3
4
5
version: 2.1
orbs:
    gcp-gke: circleci/gcp-gke@1.0.4
    go: circleci/go@1.3.0

6
7
8
9
10
11
12
13
commands:
  skip-job-on-doc-only-changes:
    description: "Do not continue this job and exit with success for PRs with only doc changes"
    steps:

      - run:
          name: docs-only changes skip check
          command: |
14
            # pipeline.git.base_revision is not always defined, so only proceed if all external vars are defined
15
            if test -n "<< pipeline.git.base_revision >>" && test -n "<< pipeline.git.revision >>" && test -n "$(git diff --name-only << pipeline.git.base_revision >>...<< pipeline.git.revision >>)"
16
            then
17
18
19
20
21
                if git diff --name-only << pipeline.git.base_revision >>...<< pipeline.git.revision >> | egrep -qv '\.(md|rst)$'
                then
                    echo "Non-docs were modified in this PR, proceeding normally"
                else
                    echo "Only docs were modified in this PR, quitting this job"
22
                    circleci step halt
23
                fi
24
            else
25
                echo "Can't perform skipping check w/o base_revision defined, continuing the job"
26
27
            fi

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# TPU REFERENCES
references:
    checkout_ml_testing: &checkout_ml_testing
        run:
            name: Checkout ml-testing-accelerators
            command: |
                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
                cd ml-testing-accelerators
                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
                git checkout stable
    build_push_docker: &build_push_docker
        run:
            name: Configure Docker
            command: |
                gcloud --quiet auth configure-docker
                cd docker/transformers-pytorch-tpu
44
                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" . ; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
    deploy_cluster: &deploy_cluster
        run:
            name: Deploy the job on the kubernetes cluster
            command: |
                go get github.com/google/go-jsonnet/cmd/jsonnet && \
                export PATH=$PATH:$HOME/go/bin && \
                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
                job_name=${job_name#job.batch/} && \
                job_name=${job_name% created} && \
                echo "Waiting on kubernetes job: $job_name" && \
                i=0 && \
                # 30 checks spaced 30s apart = 900s total.
                max_checks=30 && \
                status_code=2 && \
                # Check on the job periodically. Set the status code depending on what
                # happened to the job in Kubernetes. If we try max_checks times and
                # still the job hasn't finished, give up and return the starting
                # non-zero status code.
                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                echo "Done waiting. Job status code: $status_code" && \
67
68
69
                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
                echo "GKE pod name: $pod_name" && \
                kubectl logs -f $pod_name --container=train
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
                echo "Done with log retrieval attempt." && \
                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                exit $status_code
    delete_gke_jobs: &delete_gke_jobs
        run:
            name: Delete GKE Jobs
            command: |
                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
                # that has been around longer than 1hr. First print all columns for
                # matches, then execute the delete.
                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')




Julien Chaumond's avatar
Julien Chaumond committed
86
jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
87
    run_tests_torch_and_tf:
88
        working_directory: ~/transformers
89
        docker:
90
            - image: circleci/python:3.6
91
92
        environment:
            OMP_NUM_THREADS: 1
93
94
95
96
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
97
            - skip-job-on-doc-only-changes
98
99
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
100
101
                      - v0.4-torch_and_tf-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
102
            - run: pip install --upgrade pip
103
            - run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece]
104
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
105
                key: v0.4-{{ checksum "setup.py" }}
106
107
                paths:
                    - '~/.cache/pip'
108
            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt
109
            - store_artifacts:
110
111
112
113
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

Aymeric Augustin's avatar
Aymeric Augustin committed
114
    run_tests_torch:
115
        working_directory: ~/transformers
Julien Chaumond's avatar
Julien Chaumond committed
116
        docker:
117
            - image: circleci/python:3.7
118
119
        environment:
            OMP_NUM_THREADS: 1
120
        resource_class: xlarge
121
        parallelism: 1
Julien Chaumond's avatar
Julien Chaumond committed
122
123
        steps:
            - checkout
124
            - skip-job-on-doc-only-changes
125
126
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
127
128
                      - v0.4-torch-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
129
            - run: pip install --upgrade pip
130
            - run: pip install .[sklearn,torch,testing,sentencepiece]
131
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
132
                  key: v0.4-torch-{{ checksum "setup.py" }}
133
134
                  paths:
                      - '~/.cache/pip'
135
            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=tests_torch ./tests/ | tee tests_output.txt
136
            - store_artifacts:
137
138
139
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
Lysandre Debut's avatar
Lysandre Debut committed
140

Aymeric Augustin's avatar
Aymeric Augustin committed
141
    run_tests_tf:
142
        working_directory: ~/transformers
thomwolf's avatar
thomwolf committed
143
        docker:
144
            - image: circleci/python:3.7
145
146
        environment:
            OMP_NUM_THREADS: 1
thomwolf's avatar
thomwolf committed
147
148
149
150
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
151
            - skip-job-on-doc-only-changes
152
153
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
154
155
                      - v0.4-tf-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
156
            - run: pip install --upgrade pip
157
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
158
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
159
                  key: v0.4-tf-{{ checksum "setup.py" }}
160
161
                  paths:
                      - '~/.cache/pip'
162
            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_tf ./tests/ | tee tests_output.txt
163
            - store_artifacts:
164
165
166
167
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

168
169
170
171
172
173
174
175
176
177
    run_tests_flax:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
178
            - skip-job-on-doc-only-changes
179
180
            - restore_cache:
                keys:
Lysandre Debut's avatar
Lysandre Debut committed
181
182
                    - v0.4-flax-{{ checksum "setup.py" }}
                    - v0.4-{{ checksum "setup.py" }}
183
            - run: pip install --upgrade pip
184
            - run: sudo pip install .[flax,sklearn,torch,testing,sentencepiece]
185
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
186
                  key: v0.4-flax-{{ checksum "setup.py" }}
187
188
                  paths:
                      - '~/.cache/pip'
189
            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_flax ./tests/ | tee tests_output.txt
190
            - store_artifacts:
191
192
193
194
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

195
196
197
198
199
200
201
202
203
204
    run_tests_pipelines_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
205
            - skip-job-on-doc-only-changes
206
207
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
208
209
                      - v0.4-torch-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
210
            - run: pip install --upgrade pip
211
            - run: pip install .[sklearn,torch,testing,sentencepiece]
212
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
213
                  key: v0.4-torch-{{ checksum "setup.py" }}
214
215
                  paths:
                      - '~/.cache/pip'
216
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
217
            - store_artifacts:
218
219
220
221
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

222
223
224
225
226
227
228
229
230
231
    run_tests_pipelines_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
232
            - skip-job-on-doc-only-changes
233
234
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
235
236
                      - v0.4-tf-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
237
            - run: pip install --upgrade pip
238
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
239
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
240
                  key: v0.4-tf-{{ checksum "setup.py" }}
241
242
                  paths:
                      - '~/.cache/pip'
243
244
245
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
            - store_artifacts:
                  path: ~/transformers/tests_output.txt
246
            - store_artifacts:
247
248
                  path: ~/transformers/reports

Aymeric Augustin's avatar
Aymeric Augustin committed
249
    run_tests_custom_tokenizers:
250
251
        working_directory: ~/transformers
        docker:
252
            - image: circleci/python:3.7
253
254
        environment:
            RUN_CUSTOM_TOKENIZERS: yes
255
256
        steps:
            - checkout
257
            - skip-job-on-doc-only-changes
258
259
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
260
261
                      - v0.4-custom_tokenizers-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
262
            - run: pip install --upgrade pip
263
            - run: pip install .[ja,testing,sentencepiece]
264
            - run: python -m unidic download
265
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
266
                  key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
267
268
                  paths:
                      - '~/.cache/pip'
269
270
271
            - run: python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt
            - store_artifacts:
                  path: ~/transformers/tests_output.txt
272
            - store_artifacts:
273
274
                  path: ~/transformers/reports

Aymeric Augustin's avatar
Aymeric Augustin committed
275
    run_examples_torch:
276
277
        working_directory: ~/transformers
        docker:
278
            - image: circleci/python:3.6
279
280
281
282
283
284
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
285
            - skip-job-on-doc-only-changes
286
287
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
288
289
                      - v0.4-torch_examples-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
290
            - run: pip install --upgrade pip
291
            - run: pip install .[sklearn,torch,sentencepiece,testing]
292
293
            - run: pip install -r examples/requirements.txt
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
294
                  key: v0.4-torch_examples-{{ checksum "setup.py" }}
295
296
                  paths:
                      - '~/.cache/pip'
297
            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
298
            - store_artifacts:
299
300
301
                  path: ~/transformers/examples_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
302

303
304
305
306
307
308
    build_doc:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
        steps:
            - checkout
309
310
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
311
312
                      - v0.4-build_doc-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
313
            - run: pip install --upgrade pip
314
            - run: pip install ."[all, docs]"
315
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
316
                  key: v0.4-build_doc-{{ checksum "setup.py" }}
317
318
                  paths:
                      - '~/.cache/pip'
319
            - run: cd docs && make html SPHINXOPTS="-W"
320
321
            - store_artifacts:
                path: ./docs/_build
322

LysandreJik's avatar
LysandreJik committed
323
    deploy_doc:
324
        working_directory: ~/transformers
LysandreJik's avatar
LysandreJik committed
325
        docker:
326
            - image: circleci/python:3.6
LysandreJik's avatar
LysandreJik committed
327
328
        steps:
            - add_ssh_keys:
329
330
                fingerprints:
                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
LysandreJik's avatar
LysandreJik committed
331
            - checkout
332
333
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
334
335
                      - v0.4-deploy_doc-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
336
            - run: pip install ."[all,docs]"
337
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
338
                  key: v0.4-deploy_doc-{{ checksum "setup.py" }}
339
340
                  paths:
                      - '~/.cache/pip'
Lysandre's avatar
Lysandre committed
341
            - run: ./.circleci/deploy.sh
342

Aymeric Augustin's avatar
Aymeric Augustin committed
343
344
345
346
    check_code_quality:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
Aymeric Augustin's avatar
Aymeric Augustin committed
347
        resource_class: medium
Aymeric Augustin's avatar
Aymeric Augustin committed
348
349
350
        parallelism: 1
        steps:
            - checkout
351
352
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
353
354
                      - v0.4-code_quality-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
355
            - run: pip install --upgrade pip
356
            - run: pip install isort
357
            - run: pip install .[all,quality]
358
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
359
                  key: v0.4-code_quality-{{ checksum "setup.py" }}
360
361
                  paths:
                      - '~/.cache/pip'
362
363
364
            - run: black --check examples tests src utils
            - run: isort --check-only examples tests src utils
            - run: flake8 examples tests src utils
Sylvain Gugger's avatar
Sylvain Gugger committed
365
            - run: python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only
366
            - run: python utils/check_copies.py
367
            - run: python utils/check_dummies.py
368
            - run: python utils/check_repo.py
369

370
    check_repository_consistency:
R茅mi Louf's avatar
R茅mi Louf committed
371
372
        working_directory: ~/transformers
        docker:
373
            - image: circleci/python:3.6
R茅mi Louf's avatar
R茅mi Louf committed
374
375
376
377
        resource_class: small
        parallelism: 1
        steps:
            - checkout
378
            - run: pip install requests
R茅mi Louf's avatar
R茅mi Louf committed
379
            - run: python ./utils/link_tester.py
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399

# TPU JOBS
    run_examples_tpu:
        docker:
            - image: circleci/python:3.6
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - go/install
            - *checkout_ml_testing
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - setup_remote_docker
            - *build_push_docker
            - *deploy_cluster
400

401
402
403
404
405
406
407
408
409
    cleanup-gke-jobs:
        docker:
            - image: circleci/python:3.6
        steps:
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - *delete_gke_jobs
410

LysandreJik's avatar
LysandreJik committed
411
412
413
414
workflow_filters: &workflow_filters
    filters:
        branches:
            only:
Lysandre's avatar
Lysandre committed
415
                - master
416
workflows:
LysandreJik's avatar
LysandreJik committed
417
418
419
    version: 2
    build_and_test:
        jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
420
            - check_code_quality
421
            - check_repository_consistency
Aymeric Augustin's avatar
Aymeric Augustin committed
422
423
424
425
426
            - run_examples_torch
            - run_tests_custom_tokenizers
            - run_tests_torch_and_tf
            - run_tests_torch
            - run_tests_tf
427
            - run_tests_flax
428
429
            - run_tests_pipelines_torch
            - run_tests_pipelines_tf
430
            - build_doc
Lysandre's avatar
Lysandre committed
431
            - deploy_doc: *workflow_filters
Lysandre's avatar
Lysandre committed
432
433
434
435
436
437
438
439
440
441
442
443
    tpu_testing_jobs:
        triggers:
            - schedule:
                # Set to run at the first minute of every hour.
                cron: "0 8 * * *"
                filters:
                    branches:
                        only:
                            - master
        jobs:
            - cleanup-gke-jobs
            - run_examples_tpu