config.yml 17.1 KB
Newer Older
1
2
3
4
5
version: 2.1
orbs:
    gcp-gke: circleci/gcp-gke@1.0.4
    go: circleci/go@1.3.0

6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
commands:
  skip-job-on-doc-only-changes:
    description: "Do not continue this job and exit with success for PRs with only doc changes"
    steps:

      - run:
          name: docs-only changes skip check
          command: |
            if git diff --name-only << pipeline.git.base_revision >>...<< pipeline.git.revision >> | egrep -qv '\.(md|rst)$'
            then
                echo "Non-docs were modified in this PR, proceeding normally"
            else
                echo "Only docs were modified in this PR, quitting this job"
                circleci step halt
            fi

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# TPU REFERENCES
references:
    checkout_ml_testing: &checkout_ml_testing
        run:
            name: Checkout ml-testing-accelerators
            command: |
                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
                cd ml-testing-accelerators
                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
                git checkout stable
    build_push_docker: &build_push_docker
        run:
            name: Configure Docker
            command: |
                gcloud --quiet auth configure-docker
                cd docker/transformers-pytorch-tpu
38
                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" . ; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
    deploy_cluster: &deploy_cluster
        run:
            name: Deploy the job on the kubernetes cluster
            command: |
                go get github.com/google/go-jsonnet/cmd/jsonnet && \
                export PATH=$PATH:$HOME/go/bin && \
                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
                job_name=${job_name#job.batch/} && \
                job_name=${job_name% created} && \
                echo "Waiting on kubernetes job: $job_name" && \
                i=0 && \
                # 30 checks spaced 30s apart = 900s total.
                max_checks=30 && \
                status_code=2 && \
                # Check on the job periodically. Set the status code depending on what
                # happened to the job in Kubernetes. If we try max_checks times and
                # still the job hasn't finished, give up and return the starting
                # non-zero status code.
                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                echo "Done waiting. Job status code: $status_code" && \
61
62
63
                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
                echo "GKE pod name: $pod_name" && \
                kubectl logs -f $pod_name --container=train
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
                echo "Done with log retrieval attempt." && \
                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                exit $status_code
    delete_gke_jobs: &delete_gke_jobs
        run:
            name: Delete GKE Jobs
            command: |
                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
                # that has been around longer than 1hr. First print all columns for
                # matches, then execute the delete.
                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')




Julien Chaumond's avatar
Julien Chaumond committed
80
jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
81
    run_tests_torch_and_tf:
82
        working_directory: ~/transformers
83
        docker:
84
            - image: circleci/python:3.6
85
86
        environment:
            OMP_NUM_THREADS: 1
87
88
89
90
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
91
            # - skip-job-on-doc-only-changes
92
93
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
94
95
                      - v0.4-torch_and_tf-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
96
            - run: pip install --upgrade pip
97
            - run: pip install .[sklearn,tf-cpu,torch,testing,sentencepiece]
98
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
99
                key: v0.4-{{ checksum "setup.py" }}
100
101
                paths:
                    - '~/.cache/pip'
102
            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt
103
            - store_artifacts:
104
105
106
107
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

Aymeric Augustin's avatar
Aymeric Augustin committed
108
    run_tests_torch:
109
        working_directory: ~/transformers
Julien Chaumond's avatar
Julien Chaumond committed
110
        docker:
111
            - image: circleci/python:3.7
112
113
        environment:
            OMP_NUM_THREADS: 1
114
        resource_class: xlarge
115
        parallelism: 1
Julien Chaumond's avatar
Julien Chaumond committed
116
117
        steps:
            - checkout
118
            # - skip-job-on-doc-only-changes
119
120
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
121
122
                      - v0.4-torch-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
123
            - run: pip install --upgrade pip
124
            - run: pip install .[sklearn,torch,testing,sentencepiece]
125
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
126
                  key: v0.4-torch-{{ checksum "setup.py" }}
127
128
                  paths:
                      - '~/.cache/pip'
129
            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=tests_torch ./tests/ | tee tests_output.txt
130
            - store_artifacts:
131
132
133
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
Lysandre Debut's avatar
Lysandre Debut committed
134

Aymeric Augustin's avatar
Aymeric Augustin committed
135
    run_tests_tf:
136
        working_directory: ~/transformers
thomwolf's avatar
thomwolf committed
137
        docker:
138
            - image: circleci/python:3.7
139
140
        environment:
            OMP_NUM_THREADS: 1
thomwolf's avatar
thomwolf committed
141
142
143
144
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
145
            # - skip-job-on-doc-only-changes
146
147
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
148
149
                      - v0.4-tf-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
150
            - run: pip install --upgrade pip
151
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
152
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
153
                  key: v0.4-tf-{{ checksum "setup.py" }}
154
155
                  paths:
                      - '~/.cache/pip'
156
            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_tf ./tests/ | tee tests_output.txt
157
            - store_artifacts:
158
159
160
161
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

162
163
164
165
166
167
168
169
170
171
    run_tests_flax:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
172
            # - skip-job-on-doc-only-changes
173
174
            - restore_cache:
                keys:
Lysandre Debut's avatar
Lysandre Debut committed
175
176
                    - v0.4-flax-{{ checksum "setup.py" }}
                    - v0.4-{{ checksum "setup.py" }}
177
            - run: pip install --upgrade pip
178
            - run: sudo pip install .[flax,sklearn,torch,testing,sentencepiece]
179
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
180
                  key: v0.4-flax-{{ checksum "setup.py" }}
181
182
                  paths:
                      - '~/.cache/pip'
183
            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_flax ./tests/ | tee tests_output.txt
184
            - store_artifacts:
185
186
187
188
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

189
190
191
192
193
194
195
196
197
198
    run_tests_pipelines_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
199
            # - skip-job-on-doc-only-changes
200
201
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
202
203
                      - v0.4-torch-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
204
            - run: pip install --upgrade pip
205
            - run: pip install .[sklearn,torch,testing,sentencepiece]
206
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
207
                  key: v0.4-torch-{{ checksum "setup.py" }}
208
209
                  paths:
                      - '~/.cache/pip'
210
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
211
            - store_artifacts:
212
213
214
215
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports

216
217
218
219
220
221
222
223
224
225
    run_tests_pipelines_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
226
            # - skip-job-on-doc-only-changes
227
228
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
229
230
                      - v0.4-tf-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
231
            - run: pip install --upgrade pip
232
            - run: pip install .[sklearn,tf-cpu,testing,sentencepiece]
233
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
234
                  key: v0.4-tf-{{ checksum "setup.py" }}
235
236
                  paths:
                      - '~/.cache/pip'
237
238
239
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
            - store_artifacts:
                  path: ~/transformers/tests_output.txt
240
            - store_artifacts:
241
242
                  path: ~/transformers/reports

Aymeric Augustin's avatar
Aymeric Augustin committed
243
    run_tests_custom_tokenizers:
244
245
        working_directory: ~/transformers
        docker:
246
            - image: circleci/python:3.7
247
248
        environment:
            RUN_CUSTOM_TOKENIZERS: yes
249
250
        steps:
            - checkout
251
            # - skip-job-on-doc-only-changes
252
253
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
254
255
                      - v0.4-custom_tokenizers-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
256
            - run: pip install --upgrade pip
257
            - run: pip install .[ja,testing,sentencepiece]
258
            - run: python -m unidic download
259
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
260
                  key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
261
262
                  paths:
                      - '~/.cache/pip'
263
264
265
            - run: python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt
            - store_artifacts:
                  path: ~/transformers/tests_output.txt
266
            - store_artifacts:
267
268
                  path: ~/transformers/reports

Aymeric Augustin's avatar
Aymeric Augustin committed
269
    run_examples_torch:
270
271
        working_directory: ~/transformers
        docker:
272
            - image: circleci/python:3.6
273
274
275
276
277
278
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
279
            # - skip-job-on-doc-only-changes
280
281
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
282
283
                      - v0.4-torch_examples-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
284
            - run: pip install --upgrade pip
285
            - run: pip install .[sklearn,torch,sentencepiece,testing]
286
287
            - run: pip install -r examples/requirements.txt
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
288
                  key: v0.4-torch_examples-{{ checksum "setup.py" }}
289
290
                  paths:
                      - '~/.cache/pip'
291
            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
292
            - store_artifacts:
293
294
295
                  path: ~/transformers/examples_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
296

297
298
299
300
301
302
    build_doc:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
        steps:
            - checkout
303
304
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
305
306
                      - v0.4-build_doc-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
307
            - run: pip install --upgrade pip
308
            - run: pip install ."[all, docs]"
309
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
310
                  key: v0.4-build_doc-{{ checksum "setup.py" }}
311
312
                  paths:
                      - '~/.cache/pip'
313
            - run: cd docs && make html SPHINXOPTS="-W"
314
315
            - store_artifacts:
                path: ./docs/_build
316

LysandreJik's avatar
LysandreJik committed
317
    deploy_doc:
318
        working_directory: ~/transformers
LysandreJik's avatar
LysandreJik committed
319
        docker:
320
            - image: circleci/python:3.6
LysandreJik's avatar
LysandreJik committed
321
322
        steps:
            - add_ssh_keys:
323
324
                fingerprints:
                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
LysandreJik's avatar
LysandreJik committed
325
            - checkout
326
327
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
328
329
                      - v0.4-deploy_doc-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
330
            - run: pip install ."[all,docs]"
331
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
332
                  key: v0.4-deploy_doc-{{ checksum "setup.py" }}
333
334
                  paths:
                      - '~/.cache/pip'
Lysandre's avatar
Lysandre committed
335
            - run: ./.circleci/deploy.sh
336

Aymeric Augustin's avatar
Aymeric Augustin committed
337
338
339
340
    check_code_quality:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
Aymeric Augustin's avatar
Aymeric Augustin committed
341
        resource_class: medium
Aymeric Augustin's avatar
Aymeric Augustin committed
342
343
344
        parallelism: 1
        steps:
            - checkout
345
346
            - restore_cache:
                  keys:
Lysandre Debut's avatar
Lysandre Debut committed
347
348
                      - v0.4-code_quality-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
349
            - run: pip install --upgrade pip
350
            - run: pip install isort
351
            - run: pip install .[all,quality]
352
            - save_cache:
Lysandre Debut's avatar
Lysandre Debut committed
353
                  key: v0.4-code_quality-{{ checksum "setup.py" }}
354
355
                  paths:
                      - '~/.cache/pip'
356
357
358
            - run: black --check examples tests src utils
            - run: isort --check-only examples tests src utils
            - run: flake8 examples tests src utils
Sylvain Gugger's avatar
Sylvain Gugger committed
359
            - run: python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only
360
            - run: python utils/check_copies.py
361
            - run: python utils/check_dummies.py
362
            - run: python utils/check_repo.py
363

364
    check_repository_consistency:
R茅mi Louf's avatar
R茅mi Louf committed
365
366
        working_directory: ~/transformers
        docker:
367
            - image: circleci/python:3.6
R茅mi Louf's avatar
R茅mi Louf committed
368
369
370
371
        resource_class: small
        parallelism: 1
        steps:
            - checkout
372
            - run: pip install requests
R茅mi Louf's avatar
R茅mi Louf committed
373
            - run: python ./utils/link_tester.py
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393

# TPU JOBS
    run_examples_tpu:
        docker:
            - image: circleci/python:3.6
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - go/install
            - *checkout_ml_testing
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - setup_remote_docker
            - *build_push_docker
            - *deploy_cluster
394

395
396
397
398
399
400
401
402
403
    cleanup-gke-jobs:
        docker:
            - image: circleci/python:3.6
        steps:
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - *delete_gke_jobs
404

LysandreJik's avatar
LysandreJik committed
405
406
407
408
workflow_filters: &workflow_filters
    filters:
        branches:
            only:
Lysandre's avatar
Lysandre committed
409
                - master
410
workflows:
LysandreJik's avatar
LysandreJik committed
411
412
413
    version: 2
    build_and_test:
        jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
414
            - check_code_quality
415
            - check_repository_consistency
Aymeric Augustin's avatar
Aymeric Augustin committed
416
417
418
419
420
            - run_examples_torch
            - run_tests_custom_tokenizers
            - run_tests_torch_and_tf
            - run_tests_torch
            - run_tests_tf
421
            - run_tests_flax
422
423
            - run_tests_pipelines_torch
            - run_tests_pipelines_tf
424
            - build_doc
Lysandre's avatar
Lysandre committed
425
            - deploy_doc: *workflow_filters
Lysandre's avatar
Lysandre committed
426
427
428
429
430
431
432
433
434
435
436
437
    tpu_testing_jobs:
        triggers:
            - schedule:
                # Set to run at the first minute of every hour.
                cron: "0 8 * * *"
                filters:
                    branches:
                        only:
                            - master
        jobs:
            - cleanup-gke-jobs
            - run_examples_tpu