config.yml 15.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
version: 2.1
orbs:
    gcp-gke: circleci/gcp-gke@1.0.4
    go: circleci/go@1.3.0

# TPU REFERENCES
references:
    checkout_ml_testing: &checkout_ml_testing
        run:
            name: Checkout ml-testing-accelerators
            command: |
                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
                cd ml-testing-accelerators
                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
                git checkout stable
    build_push_docker: &build_push_docker
        run:
            name: Configure Docker
            command: |
                gcloud --quiet auth configure-docker
                cd docker/transformers-pytorch-tpu
22
                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" . ; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
    deploy_cluster: &deploy_cluster
        run:
            name: Deploy the job on the kubernetes cluster
            command: |
                go get github.com/google/go-jsonnet/cmd/jsonnet && \
                export PATH=$PATH:$HOME/go/bin && \
                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
                job_name=${job_name#job.batch/} && \
                job_name=${job_name% created} && \
                echo "Waiting on kubernetes job: $job_name" && \
                i=0 && \
                # 30 checks spaced 30s apart = 900s total.
                max_checks=30 && \
                status_code=2 && \
                # Check on the job periodically. Set the status code depending on what
                # happened to the job in Kubernetes. If we try max_checks times and
                # still the job hasn't finished, give up and return the starting
                # non-zero status code.
                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                echo "Done waiting. Job status code: $status_code" && \
45
46
47
                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
                echo "GKE pod name: $pod_name" && \
                kubectl logs -f $pod_name --container=train
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
                echo "Done with log retrieval attempt." && \
                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                exit $status_code
    delete_gke_jobs: &delete_gke_jobs
        run:
            name: Delete GKE Jobs
            command: |
                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
                # that has been around longer than 1hr. First print all columns for
                # matches, then execute the delete.
                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')




Julien Chaumond's avatar
Julien Chaumond committed
64
jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
65
    run_tests_torch_and_tf:
66
        working_directory: ~/transformers
67
        docker:
68
            - image: circleci/python:3.6
69
70
        environment:
            OMP_NUM_THREADS: 1
71
72
73
74
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
75
76
77
78
79
            - restore_cache:
                  keys:
                      - v0.3-torch_and_tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
80
            - run: pip install git+https://github.com/huggingface/datasets
81
82
83
84
85
86
            - run: pip install .[sklearn,tf-cpu,torch,testing]
            - run: pip install codecov pytest-cov
            - save_cache:
                key: v0.3-{{ checksum "setup.py" }}
                paths:
                    - '~/.cache/pip'
87
            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pt_tf_cross_test --cov --durations=0 | tee output.txt
88
            - run: codecov
89
90
91
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
92
    run_tests_torch:
93
        working_directory: ~/transformers
Julien Chaumond's avatar
Julien Chaumond committed
94
        docker:
95
            - image: circleci/python:3.7
96
97
        environment:
            OMP_NUM_THREADS: 1
98
        resource_class: xlarge
99
        parallelism: 1
Julien Chaumond's avatar
Julien Chaumond committed
100
101
        steps:
            - checkout
102
103
104
105
106
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
107
            - run: pip install git+https://github.com/huggingface/datasets
108
109
110
111
112
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
113
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
114
115
116
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
117
    run_tests_tf:
118
        working_directory: ~/transformers
thomwolf's avatar
thomwolf committed
119
        docker:
120
            - image: circleci/python:3.7
121
122
        environment:
            OMP_NUM_THREADS: 1
thomwolf's avatar
thomwolf committed
123
124
125
126
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
127
128
129
130
131
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
132
            - run: pip install git+https://github.com/huggingface/datasets
133
134
135
136
137
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
138
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
139
140
141
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
    run_tests_flax:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                keys:
                    - v0.3-flax-{{ checksum "setup.py" }}
                    - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: sudo pip install .[flax,sklearn,torch,testing]
            - save_cache:
                  key: v0.3-flax-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    run_tests_pipelines_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pipeline_test | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
    run_tests_pipelines_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pipeline_test | tee output.txt
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
217
    run_tests_custom_tokenizers:
218
219
        working_directory: ~/transformers
        docker:
220
            - image: circleci/python:3.6
221
222
        environment:
            RUN_CUSTOM_TOKENIZERS: yes
223
224
        steps:
            - checkout
225
226
227
228
229
230
            - restore_cache:
                  keys:
                      - v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[ja,testing]
231
            - run: python -m unidic download
232
233
234
235
            - save_cache:
                  key: v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
236
237
238
239
            - run: python -m pytest -s ./tests/test_tokenization_bert_japanese.py | tee output.txt
            - store_artifacts:
                path: ~/transformers/output.txt
                destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
240
    run_examples_torch:
241
242
        working_directory: ~/transformers
        docker:
243
            - image: circleci/python:3.6
244
245
246
247
248
249
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
250
251
252
253
254
255
256
257
258
259
260
            - restore_cache:
                  keys:
                      - v0.3-torch_examples-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,torch,testing]
            - run: pip install -r examples/requirements.txt
            - save_cache:
                  key: v0.3-torch_examples-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
261
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./examples/ | tee output.txt
262
263
264
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
265
266
267
268
269
270
    build_doc:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
        steps:
            - checkout
271
272
273
274
275
            - restore_cache:
                  keys:
                      - v0.3-build_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
276
            - run: pip install .[tf,torch,sentencepiece,docs]
277
278
279
280
            - save_cache:
                  key: v0.3-build_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
281
            - run: cd docs && make html SPHINXOPTS="-W"
282
283
            - store_artifacts:
                path: ./docs/_build
LysandreJik's avatar
LysandreJik committed
284
    deploy_doc:
285
        working_directory: ~/transformers
LysandreJik's avatar
LysandreJik committed
286
        docker:
287
            - image: circleci/python:3.6
LysandreJik's avatar
LysandreJik committed
288
289
        steps:
            - add_ssh_keys:
290
291
                fingerprints:
                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
LysandreJik's avatar
LysandreJik committed
292
            - checkout
293
294
295
296
            - restore_cache:
                  keys:
                      - v0.3-deploy_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
297
            - run: pip install .[tf,torch,sentencepiece,docs]
298
299
300
301
            - save_cache:
                  key: v0.3-deploy_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
Lysandre's avatar
Lysandre committed
302
            - run: ./.circleci/deploy.sh
Aymeric Augustin's avatar
Aymeric Augustin committed
303
304
305
306
    check_code_quality:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
Aymeric Augustin's avatar
Aymeric Augustin committed
307
        resource_class: medium
Aymeric Augustin's avatar
Aymeric Augustin committed
308
309
310
        parallelism: 1
        steps:
            - checkout
311
312
313
314
315
            - restore_cache:
                  keys:
                      - v0.3-code_quality-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
316
            - run: pip install isort
Stas Bekman's avatar
Stas Bekman committed
317
            - run: pip install .[tf,torch,flax,quality]
318
319
320
321
            - save_cache:
                  key: v0.3-code_quality-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
322
            - run: black --check examples templates tests src utils
Stas Bekman's avatar
Stas Bekman committed
323
            - run: isort --check-only examples templates tests src utils
324
            - run: flake8 examples templates tests src utils
325
            - run: python utils/check_copies.py
326
            - run: python utils/check_dummies.py
327
            - run: python utils/check_repo.py
328
    check_repository_consistency:
R茅mi Louf's avatar
R茅mi Louf committed
329
330
        working_directory: ~/transformers
        docker:
331
            - image: circleci/python:3.6
R茅mi Louf's avatar
R茅mi Louf committed
332
333
334
335
        resource_class: small
        parallelism: 1
        steps:
            - checkout
336
            - run: pip install requests
R茅mi Louf's avatar
R茅mi Louf committed
337
            - run: python ./utils/link_tester.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

# TPU JOBS
    run_examples_tpu:
        docker:
            - image: circleci/python:3.6
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - go/install
            - *checkout_ml_testing
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - setup_remote_docker
            - *build_push_docker
            - *deploy_cluster
    cleanup-gke-jobs:
        docker:
            - image: circleci/python:3.6
        steps:
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - *delete_gke_jobs
LysandreJik's avatar
LysandreJik committed
367
368
369
370
workflow_filters: &workflow_filters
    filters:
        branches:
            only:
Lysandre's avatar
Lysandre committed
371
                - master
372
workflows:
LysandreJik's avatar
LysandreJik committed
373
374
375
    version: 2
    build_and_test:
        jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
376
            - check_code_quality
377
            - check_repository_consistency
Aymeric Augustin's avatar
Aymeric Augustin committed
378
379
380
381
382
            - run_examples_torch
            - run_tests_custom_tokenizers
            - run_tests_torch_and_tf
            - run_tests_torch
            - run_tests_tf
383
            - run_tests_flax
384
385
            - run_tests_pipelines_torch
            - run_tests_pipelines_tf
386
            - build_doc
Lysandre's avatar
Lysandre committed
387
            - deploy_doc: *workflow_filters
Lysandre's avatar
Lysandre committed
388
389
390
391
392
393
394
395
396
397
398
399
    tpu_testing_jobs:
        triggers:
            - schedule:
                # Set to run at the first minute of every hour.
                cron: "0 8 * * *"
                filters:
                    branches:
                        only:
                            - master
        jobs:
            - cleanup-gke-jobs
            - run_examples_tpu