config.yml 16.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
version: 2.1
orbs:
    gcp-gke: circleci/gcp-gke@1.0.4
    go: circleci/go@1.3.0

# TPU REFERENCES
references:
    checkout_ml_testing: &checkout_ml_testing
        run:
            name: Checkout ml-testing-accelerators
            command: |
                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
                cd ml-testing-accelerators
                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
                git checkout stable
    build_push_docker: &build_push_docker
        run:
            name: Configure Docker
            command: |
                gcloud --quiet auth configure-docker
                cd docker/transformers-pytorch-tpu
22
                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" . ; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
    deploy_cluster: &deploy_cluster
        run:
            name: Deploy the job on the kubernetes cluster
            command: |
                go get github.com/google/go-jsonnet/cmd/jsonnet && \
                export PATH=$PATH:$HOME/go/bin && \
                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
                job_name=${job_name#job.batch/} && \
                job_name=${job_name% created} && \
                echo "Waiting on kubernetes job: $job_name" && \
                i=0 && \
                # 30 checks spaced 30s apart = 900s total.
                max_checks=30 && \
                status_code=2 && \
                # Check on the job periodically. Set the status code depending on what
                # happened to the job in Kubernetes. If we try max_checks times and
                # still the job hasn't finished, give up and return the starting
                # non-zero status code.
                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                echo "Done waiting. Job status code: $status_code" && \
45
46
47
                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
                echo "GKE pod name: $pod_name" && \
                kubectl logs -f $pod_name --container=train
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
                echo "Done with log retrieval attempt." && \
                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                exit $status_code
    delete_gke_jobs: &delete_gke_jobs
        run:
            name: Delete GKE Jobs
            command: |
                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
                # that has been around longer than 1hr. First print all columns for
                # matches, then execute the delete.
                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')




Julien Chaumond's avatar
Julien Chaumond committed
64
jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
65
    run_tests_torch_and_tf:
66
        working_directory: ~/transformers
67
        docker:
68
            - image: circleci/python:3.6
69
70
        environment:
            OMP_NUM_THREADS: 1
71
72
73
74
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
75
76
77
78
79
            - restore_cache:
                  keys:
                      - v0.3-torch_and_tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
80
            - run: pip install git+https://github.com/huggingface/datasets
81
82
83
84
85
86
            - run: pip install .[sklearn,tf-cpu,torch,testing]
            - run: pip install codecov pytest-cov
            - save_cache:
                key: v0.3-{{ checksum "setup.py" }}
                paths:
                    - '~/.cache/pip'
87
            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pt_tf_cross_test --cov --durations=0 | tee output.txt
88
            - run: codecov
89
90
91
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
92
    run_tests_torch:
93
        working_directory: ~/transformers
Julien Chaumond's avatar
Julien Chaumond committed
94
        docker:
95
            - image: circleci/python:3.7
96
97
        environment:
            OMP_NUM_THREADS: 1
98
        resource_class: xlarge
99
        parallelism: 1
Julien Chaumond's avatar
Julien Chaumond committed
100
101
        steps:
            - checkout
102
103
104
105
106
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
107
            - run: pip install git+https://github.com/huggingface/datasets
108
109
110
111
112
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
113
            - run: python -m pytest -n 8 --dist=loadfile -s --make_reports=tests ./tests/ | tee tests_output.txt
114
            - store_artifacts:
115
116
117
118
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
                  
Aymeric Augustin's avatar
Aymeric Augustin committed
119
    run_tests_tf:
120
        working_directory: ~/transformers
thomwolf's avatar
thomwolf committed
121
        docker:
122
            - image: circleci/python:3.7
123
124
        environment:
            OMP_NUM_THREADS: 1
thomwolf's avatar
thomwolf committed
125
126
127
128
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
129
130
131
132
133
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
134
            - run: pip install git+https://github.com/huggingface/datasets
135
136
137
138
139
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
140
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
141
142
143
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    run_tests_flax:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                keys:
                    - v0.3-flax-{{ checksum "setup.py" }}
                    - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: sudo pip install .[flax,sklearn,torch,testing]
            - save_cache:
                  key: v0.3-flax-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    run_tests_pipelines_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pipeline_test | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
    run_tests_pipelines_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pipeline_test | tee output.txt
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
219
    run_tests_custom_tokenizers:
220
221
        working_directory: ~/transformers
        docker:
222
            - image: circleci/python:3.6
223
224
        environment:
            RUN_CUSTOM_TOKENIZERS: yes
225
226
        steps:
            - checkout
227
228
229
230
231
232
            - restore_cache:
                  keys:
                      - v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[ja,testing]
233
            - run: python -m unidic download
234
235
236
237
            - save_cache:
                  key: v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
238
239
240
241
            - run: python -m pytest -s ./tests/test_tokenization_bert_japanese.py | tee output.txt
            - store_artifacts:
                path: ~/transformers/output.txt
                destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
242
    run_examples_torch:
243
244
        working_directory: ~/transformers
        docker:
245
            - image: circleci/python:3.6
246
247
248
249
250
251
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
252
253
254
255
256
257
258
259
260
261
262
            - restore_cache:
                  keys:
                      - v0.3-torch_examples-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,torch,testing]
            - run: pip install -r examples/requirements.txt
            - save_cache:
                  key: v0.3-torch_examples-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
263
            - run: python -m pytest -n 8 --dist=loadfile -s --make_reports=examples ./examples/ | tee examples_output.txt
264
            - store_artifacts:
265
266
267
                  path: ~/transformers/examples_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
268
269
270
271
272
273
    build_doc:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
        steps:
            - checkout
274
275
276
277
278
            - restore_cache:
                  keys:
                      - v0.3-build_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
279
            - run: pip install .[tf,torch,sentencepiece,docs]
280
281
282
283
            - save_cache:
                  key: v0.3-build_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
284
            - run: cd docs && make html SPHINXOPTS="-W"
285
286
            - store_artifacts:
                path: ./docs/_build
LysandreJik's avatar
LysandreJik committed
287
    deploy_doc:
288
        working_directory: ~/transformers
LysandreJik's avatar
LysandreJik committed
289
        docker:
290
            - image: circleci/python:3.6
LysandreJik's avatar
LysandreJik committed
291
292
        steps:
            - add_ssh_keys:
293
294
                fingerprints:
                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
LysandreJik's avatar
LysandreJik committed
295
            - checkout
296
297
298
299
            - restore_cache:
                  keys:
                      - v0.3-deploy_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
300
            - run: pip install .[tf,torch,sentencepiece,docs]
301
302
303
304
            - save_cache:
                  key: v0.3-deploy_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
Lysandre's avatar
Lysandre committed
305
            - run: ./.circleci/deploy.sh
Aymeric Augustin's avatar
Aymeric Augustin committed
306
307
308
309
    check_code_quality:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
Aymeric Augustin's avatar
Aymeric Augustin committed
310
        resource_class: medium
Aymeric Augustin's avatar
Aymeric Augustin committed
311
312
313
        parallelism: 1
        steps:
            - checkout
314
315
316
317
318
            - restore_cache:
                  keys:
                      - v0.3-code_quality-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
319
            - run: pip install isort
Stas Bekman's avatar
Stas Bekman committed
320
            - run: pip install .[tf,torch,flax,quality]
321
322
323
324
            - save_cache:
                  key: v0.3-code_quality-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
325
            - run: black --check examples templates tests src utils
Stas Bekman's avatar
Stas Bekman committed
326
            - run: isort --check-only examples templates tests src utils
327
            - run: flake8 examples templates tests src utils
Sylvain Gugger's avatar
Sylvain Gugger committed
328
            - run: python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only
329
            - run: python utils/check_copies.py
330
            - run: python utils/check_dummies.py
331
            - run: python utils/check_repo.py
332
    check_repository_consistency:
R茅mi Louf's avatar
R茅mi Louf committed
333
334
        working_directory: ~/transformers
        docker:
335
            - image: circleci/python:3.6
R茅mi Louf's avatar
R茅mi Louf committed
336
337
338
339
        resource_class: small
        parallelism: 1
        steps:
            - checkout
340
            - run: pip install requests
R茅mi Louf's avatar
R茅mi Louf committed
341
            - run: python ./utils/link_tester.py
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370

# TPU JOBS
    run_examples_tpu:
        docker:
            - image: circleci/python:3.6
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - go/install
            - *checkout_ml_testing
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - setup_remote_docker
            - *build_push_docker
            - *deploy_cluster
    cleanup-gke-jobs:
        docker:
            - image: circleci/python:3.6
        steps:
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - *delete_gke_jobs
LysandreJik's avatar
LysandreJik committed
371
372
373
374
workflow_filters: &workflow_filters
    filters:
        branches:
            only:
Lysandre's avatar
Lysandre committed
375
                - master
376
workflows:
LysandreJik's avatar
LysandreJik committed
377
378
379
    version: 2
    build_and_test:
        jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
380
            - check_code_quality
381
            - check_repository_consistency
Aymeric Augustin's avatar
Aymeric Augustin committed
382
383
384
385
386
            - run_examples_torch
            - run_tests_custom_tokenizers
            - run_tests_torch_and_tf
            - run_tests_torch
            - run_tests_tf
387
            - run_tests_flax
388
389
            - run_tests_pipelines_torch
            - run_tests_pipelines_tf
390
            - build_doc
Lysandre's avatar
Lysandre committed
391
            - deploy_doc: *workflow_filters
Lysandre's avatar
Lysandre committed
392
393
394
395
396
397
398
399
400
401
402
403
    tpu_testing_jobs:
        triggers:
            - schedule:
                # Set to run at the first minute of every hour.
                cron: "0 8 * * *"
                filters:
                    branches:
                        only:
                            - master
        jobs:
            - cleanup-gke-jobs
            - run_examples_tpu