config.yml 16.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
version: 2.1
orbs:
    gcp-gke: circleci/gcp-gke@1.0.4
    go: circleci/go@1.3.0

# TPU REFERENCES
references:
    checkout_ml_testing: &checkout_ml_testing
        run:
            name: Checkout ml-testing-accelerators
            command: |
                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
                cd ml-testing-accelerators
                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
                git checkout stable
    build_push_docker: &build_push_docker
        run:
            name: Configure Docker
            command: |
                gcloud --quiet auth configure-docker
                cd docker/transformers-pytorch-tpu
22
                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" . ; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
    deploy_cluster: &deploy_cluster
        run:
            name: Deploy the job on the kubernetes cluster
            command: |
                go get github.com/google/go-jsonnet/cmd/jsonnet && \
                export PATH=$PATH:$HOME/go/bin && \
                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
                job_name=${job_name#job.batch/} && \
                job_name=${job_name% created} && \
                echo "Waiting on kubernetes job: $job_name" && \
                i=0 && \
                # 30 checks spaced 30s apart = 900s total.
                max_checks=30 && \
                status_code=2 && \
                # Check on the job periodically. Set the status code depending on what
                # happened to the job in Kubernetes. If we try max_checks times and
                # still the job hasn't finished, give up and return the starting
                # non-zero status code.
                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                echo "Done waiting. Job status code: $status_code" && \
45
46
47
                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
                echo "GKE pod name: $pod_name" && \
                kubectl logs -f $pod_name --container=train
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
                echo "Done with log retrieval attempt." && \
                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                exit $status_code
    delete_gke_jobs: &delete_gke_jobs
        run:
            name: Delete GKE Jobs
            command: |
                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
                # that has been around longer than 1hr. First print all columns for
                # matches, then execute the delete.
                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')




Julien Chaumond's avatar
Julien Chaumond committed
64
jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
65
    run_tests_torch_and_tf:
66
        working_directory: ~/transformers
67
        docker:
68
            - image: circleci/python:3.6
69
70
        environment:
            OMP_NUM_THREADS: 1
71
72
73
74
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
75
76
77
78
79
            - restore_cache:
                  keys:
                      - v0.3-torch_and_tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
80
            - run: pip install git+https://github.com/huggingface/datasets
81
82
83
84
85
            - run: pip install .[sklearn,tf-cpu,torch,testing]
            - save_cache:
                key: v0.3-{{ checksum "setup.py" }}
                paths:
                    - '~/.cache/pip'
Lysandre Debut's avatar
Lysandre Debut committed
86
            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pt_tf_cross_test --durations=0 | tee output.txt
87
88
89
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
90
    run_tests_torch:
91
        working_directory: ~/transformers
Julien Chaumond's avatar
Julien Chaumond committed
92
        docker:
93
            - image: circleci/python:3.7
94
95
        environment:
            OMP_NUM_THREADS: 1
96
        resource_class: xlarge
97
        parallelism: 1
Julien Chaumond's avatar
Julien Chaumond committed
98
99
        steps:
            - checkout
100
101
102
103
104
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
105
            - run: pip install git+https://github.com/huggingface/datasets
106
107
108
109
110
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
111
            - run: python -m pytest -n 8 --dist=loadfile -s --make_reports=tests ./tests/ | tee tests_output.txt
112
            - store_artifacts:
113
114
115
116
                  path: ~/transformers/tests_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
                  
Aymeric Augustin's avatar
Aymeric Augustin committed
117
    run_tests_tf:
118
        working_directory: ~/transformers
thomwolf's avatar
thomwolf committed
119
        docker:
120
            - image: circleci/python:3.7
121
122
        environment:
            OMP_NUM_THREADS: 1
thomwolf's avatar
thomwolf committed
123
124
125
126
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
127
128
129
130
131
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
132
            - run: pip install git+https://github.com/huggingface/datasets
133
134
135
136
137
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
138
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
139
140
141
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
    run_tests_flax:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                keys:
                    - v0.3-flax-{{ checksum "setup.py" }}
                    - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: sudo pip install .[flax,sklearn,torch,testing]
            - save_cache:
                  key: v0.3-flax-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    run_tests_pipelines_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pipeline_test | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
    run_tests_pipelines_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.7
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install git+https://github.com/huggingface/datasets
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ -m is_pipeline_test | tee output.txt
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
217
    run_tests_custom_tokenizers:
218
219
        working_directory: ~/transformers
        docker:
220
            - image: circleci/python:3.6
221
222
        environment:
            RUN_CUSTOM_TOKENIZERS: yes
223
224
        steps:
            - checkout
225
226
227
228
229
230
            - restore_cache:
                  keys:
                      - v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[ja,testing]
231
            - run: python -m unidic download
232
233
234
235
            - save_cache:
                  key: v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
236
237
238
239
            - run: python -m pytest -s ./tests/test_tokenization_bert_japanese.py | tee output.txt
            - store_artifacts:
                path: ~/transformers/output.txt
                destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
240
    run_examples_torch:
241
242
        working_directory: ~/transformers
        docker:
243
            - image: circleci/python:3.6
244
245
246
247
248
249
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
250
251
252
253
254
255
256
257
258
259
260
            - restore_cache:
                  keys:
                      - v0.3-torch_examples-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,torch,testing]
            - run: pip install -r examples/requirements.txt
            - save_cache:
                  key: v0.3-torch_examples-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
261
            - run: python -m pytest -n 8 --dist=loadfile -s --make_reports=examples ./examples/ | tee examples_output.txt
262
            - store_artifacts:
263
264
265
                  path: ~/transformers/examples_output.txt
            - store_artifacts:
                  path: ~/transformers/reports
266
267
268
269
270
271
    build_doc:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
        steps:
            - checkout
272
273
274
275
276
            - restore_cache:
                  keys:
                      - v0.3-build_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
277
            - run: pip install .[tf,torch,sentencepiece,docs]
278
279
280
281
            - save_cache:
                  key: v0.3-build_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
282
            - run: cd docs && make html SPHINXOPTS="-W"
283
284
            - store_artifacts:
                path: ./docs/_build
LysandreJik's avatar
LysandreJik committed
285
    deploy_doc:
286
        working_directory: ~/transformers
LysandreJik's avatar
LysandreJik committed
287
        docker:
288
            - image: circleci/python:3.6
LysandreJik's avatar
LysandreJik committed
289
290
        steps:
            - add_ssh_keys:
291
292
                fingerprints:
                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
LysandreJik's avatar
LysandreJik committed
293
            - checkout
294
295
296
297
            - restore_cache:
                  keys:
                      - v0.3-deploy_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
298
            - run: pip install .[tf,torch,sentencepiece,docs]
299
300
301
302
            - save_cache:
                  key: v0.3-deploy_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
Lysandre's avatar
Lysandre committed
303
            - run: ./.circleci/deploy.sh
Aymeric Augustin's avatar
Aymeric Augustin committed
304
305
306
307
    check_code_quality:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
Aymeric Augustin's avatar
Aymeric Augustin committed
308
        resource_class: medium
Aymeric Augustin's avatar
Aymeric Augustin committed
309
310
311
        parallelism: 1
        steps:
            - checkout
312
313
314
315
316
            - restore_cache:
                  keys:
                      - v0.3-code_quality-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
317
            - run: pip install isort
Stas Bekman's avatar
Stas Bekman committed
318
            - run: pip install .[tf,torch,flax,quality]
319
320
321
322
            - save_cache:
                  key: v0.3-code_quality-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
323
            - run: black --check examples templates tests src utils
Stas Bekman's avatar
Stas Bekman committed
324
            - run: isort --check-only examples templates tests src utils
325
            - run: flake8 examples templates tests src utils
Sylvain Gugger's avatar
Sylvain Gugger committed
326
            - run: python utils/style_doc.py src/transformers docs/source --max_len 119 --check_only
327
            - run: python utils/check_copies.py
328
            - run: python utils/check_dummies.py
329
            - run: python utils/check_repo.py
330
    check_repository_consistency:
R茅mi Louf's avatar
R茅mi Louf committed
331
332
        working_directory: ~/transformers
        docker:
333
            - image: circleci/python:3.6
R茅mi Louf's avatar
R茅mi Louf committed
334
335
336
337
        resource_class: small
        parallelism: 1
        steps:
            - checkout
338
            - run: pip install requests
R茅mi Louf's avatar
R茅mi Louf committed
339
            - run: python ./utils/link_tester.py
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368

# TPU JOBS
    run_examples_tpu:
        docker:
            - image: circleci/python:3.6
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - go/install
            - *checkout_ml_testing
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - setup_remote_docker
            - *build_push_docker
            - *deploy_cluster
    cleanup-gke-jobs:
        docker:
            - image: circleci/python:3.6
        steps:
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - *delete_gke_jobs
LysandreJik's avatar
LysandreJik committed
369
370
371
372
workflow_filters: &workflow_filters
    filters:
        branches:
            only:
Lysandre's avatar
Lysandre committed
373
                - master
374
workflows:
LysandreJik's avatar
LysandreJik committed
375
376
377
    version: 2
    build_and_test:
        jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
378
            - check_code_quality
379
            - check_repository_consistency
Aymeric Augustin's avatar
Aymeric Augustin committed
380
381
382
383
384
            - run_examples_torch
            - run_tests_custom_tokenizers
            - run_tests_torch_and_tf
            - run_tests_torch
            - run_tests_tf
385
            - run_tests_flax
386
387
            - run_tests_pipelines_torch
            - run_tests_pipelines_tf
388
            - build_doc
Lysandre's avatar
Lysandre committed
389
            - deploy_doc: *workflow_filters
Lysandre's avatar
Lysandre committed
390
391
392
393
394
395
396
397
398
399
400
401
    tpu_testing_jobs:
        triggers:
            - schedule:
                # Set to run at the first minute of every hour.
                cron: "0 8 * * *"
                filters:
                    branches:
                        only:
                            - master
        jobs:
            - cleanup-gke-jobs
            - run_examples_tpu