config.yml 12.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
version: 2.1
orbs:
    gcp-gke: circleci/gcp-gke@1.0.4
    go: circleci/go@1.3.0

# TPU REFERENCES
references:
    checkout_ml_testing: &checkout_ml_testing
        run:
            name: Checkout ml-testing-accelerators
            command: |
                git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
                cd ml-testing-accelerators
                git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
                git checkout stable
    build_push_docker: &build_push_docker
        run:
            name: Configure Docker
            command: |
                gcloud --quiet auth configure-docker
                cd docker/transformers-pytorch-tpu
                if [ -z "$CIRCLE_PR_NUMBER" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1"; else docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=pull/$CIRCLE_PR_NUMBER/head" . ; fi
                docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
    deploy_cluster: &deploy_cluster
        run:
            name: Deploy the job on the kubernetes cluster
            command: |
                go get github.com/google/go-jsonnet/cmd/jsonnet && \
                export PATH=$PATH:$HOME/go/bin && \
                kubectl create -f docker/transformers-pytorch-tpu/dataset.yaml || true && \
                job_name=$(jsonnet -J ml-testing-accelerators/ docker/transformers-pytorch-tpu/bert-base-cased.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -) && \
                job_name=${job_name#job.batch/} && \
                job_name=${job_name% created} && \
                echo "Waiting on kubernetes job: $job_name" && \
                i=0 && \
                # 30 checks spaced 30s apart = 900s total.
                max_checks=30 && \
                status_code=2 && \
                # Check on the job periodically. Set the status code depending on what
                # happened to the job in Kubernetes. If we try max_checks times and
                # still the job hasn't finished, give up and return the starting
                # non-zero status code.
                while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                echo "Done waiting. Job status code: $status_code" && \
                # Allow time for logs to flush.
                sleep 60 && \
                echo "JOB_NAME: $job_name" && \
                gcloud logging read "resource.type=k8s_container resource.labels.project_id=$GOOGLE_PROJECT_ID resource.labels.location=$GOOGLE_COMPUTE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$GOOGLE_PROJECT_ID && \
                echo "Done with log retrieval attempt." && \
                gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                exit $status_code
    delete_gke_jobs: &delete_gke_jobs
        run:
            name: Delete GKE Jobs
            command: |
                # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
                # that has been around longer than 1hr. First print all columns for
                # matches, then execute the delete.
                kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}'
                kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}')




Julien Chaumond's avatar
Julien Chaumond committed
65
jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
66
    run_tests_torch_and_tf:
67
        working_directory: ~/transformers
68
        docker:
69
            - image: circleci/python:3.6
70
71
        environment:
            OMP_NUM_THREADS: 1
72
73
74
75
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
76
77
78
79
80
81
82
83
84
85
86
            - restore_cache:
                  keys:
                      - v0.3-torch_and_tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,torch,testing]
            - run: pip install codecov pytest-cov
            - save_cache:
                key: v0.3-{{ checksum "setup.py" }}
                paths:
                    - '~/.cache/pip'
87
            - run: python -m pytest -n 8 --dist=loadfile -s ./tests/ --cov  | tee output.txt
88
            - run: codecov
89
90
91
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
92
    run_tests_torch:
93
        working_directory: ~/transformers
Julien Chaumond's avatar
Julien Chaumond committed
94
        docker:
95
            - image: circleci/python:3.7
96
97
        environment:
            OMP_NUM_THREADS: 1
98
        resource_class: xlarge
99
        parallelism: 1
Julien Chaumond's avatar
Julien Chaumond committed
100
101
        steps:
            - checkout
102
103
104
105
106
107
108
109
110
111
            - restore_cache:
                  keys:
                      - v0.3-torch-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,torch,testing]
            - save_cache:
                  key: v0.3-torch-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
112
113
114
115
            - run: python -m pytest -n 8 --dist=loadfile -s ./tests/ | tee output.txt
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
116
    run_tests_tf:
117
        working_directory: ~/transformers
thomwolf's avatar
thomwolf committed
118
        docker:
119
            - image: circleci/python:3.7
120
121
        environment:
            OMP_NUM_THREADS: 1
thomwolf's avatar
thomwolf committed
122
123
124
125
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
126
127
128
129
130
131
132
133
134
135
            - restore_cache:
                  keys:
                      - v0.3-tf-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,tf-cpu,testing]
            - save_cache:
                  key: v0.3-tf-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
136
137
138
139
            - run: python -m pytest -n 8 --dist=loadfile -s ./tests/ | tee output.txt
            - store_artifacts:
               path: ~/transformers/output.txt
               destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
140
    run_tests_custom_tokenizers:
141
142
        working_directory: ~/transformers
        docker:
143
            - image: circleci/python:3.6
144
145
        environment:
            RUN_CUSTOM_TOKENIZERS: yes
146
147
        steps:
            - checkout
148
149
150
151
152
153
154
155
156
157
            - restore_cache:
                  keys:
                      - v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[ja,testing]
            - save_cache:
                  key: v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
158
159
160
161
            - run: python -m pytest -s ./tests/test_tokenization_bert_japanese.py | tee output.txt
            - store_artifacts:
                path: ~/transformers/output.txt
                destination: test_output.txt
Aymeric Augustin's avatar
Aymeric Augustin committed
162
    run_examples_torch:
163
164
        working_directory: ~/transformers
        docker:
165
            - image: circleci/python:3.6
166
167
168
169
170
171
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
172
173
174
175
176
177
178
179
180
181
182
            - restore_cache:
                  keys:
                      - v0.3-torch_examples-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[sklearn,torch,testing]
            - run: pip install -r examples/requirements.txt
            - save_cache:
                  key: v0.3-torch_examples-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
183
            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./examples/ | tee output.txt
184
185
186
            - store_artifacts:
                  path: ~/transformers/output.txt
                  destination: test_output.txt
187
188
189
190
191
192
    build_doc:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
        steps:
            - checkout
193
194
195
196
197
198
199
200
201
202
            - restore_cache:
                  keys:
                      - v0.3-build_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
            - run: pip install .[tf,torch,docs]
            - save_cache:
                  key: v0.3-build_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
203
            - run: cd docs && make html SPHINXOPTS="-W"
204
205
            - store_artifacts:
                path: ./docs/_build
LysandreJik's avatar
LysandreJik committed
206
    deploy_doc:
207
        working_directory: ~/transformers
LysandreJik's avatar
LysandreJik committed
208
        docker:
209
            - image: circleci/python:3.6
LysandreJik's avatar
LysandreJik committed
210
211
        steps:
            - add_ssh_keys:
212
213
                fingerprints:
                    - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
LysandreJik's avatar
LysandreJik committed
214
            - checkout
215
216
217
218
219
220
221
222
223
            - restore_cache:
                  keys:
                      - v0.3-deploy_doc-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install .[tf,torch,docs]
            - save_cache:
                  key: v0.3-deploy_doc-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
Lysandre's avatar
Lysandre committed
224
            - run: ./.circleci/deploy.sh
Aymeric Augustin's avatar
Aymeric Augustin committed
225
226
227
228
    check_code_quality:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.6
Aymeric Augustin's avatar
Aymeric Augustin committed
229
        resource_class: medium
Aymeric Augustin's avatar
Aymeric Augustin committed
230
231
232
        parallelism: 1
        steps:
            - checkout
233
234
235
236
237
            - restore_cache:
                  keys:
                      - v0.3-code_quality-{{ checksum "setup.py" }}
                      - v0.3-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
238
            # we need a version of isort with https://github.com/timothycrosley/isort/pull/1000
239
240
241
242
243
244
            - run: pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
            - run: pip install .[tf,torch,quality]
            - save_cache:
                  key: v0.3-code_quality-{{ checksum "setup.py" }}
                  paths:
                      - '~/.cache/pip'
245
            - run: black --check --line-length 119 --target-version py35 examples templates tests src utils
246
247
            - run: isort --check-only --recursive examples templates tests src utils
            - run: flake8 examples templates tests src utils
248
    check_repository_consistency:
R茅mi Louf's avatar
R茅mi Louf committed
249
250
        working_directory: ~/transformers
        docker:
251
            - image: circleci/python:3.6
R茅mi Louf's avatar
R茅mi Louf committed
252
253
254
255
        resource_class: small
        parallelism: 1
        steps:
            - checkout
256
            - run: pip install requests
R茅mi Louf's avatar
R茅mi Louf committed
257
            - run: python ./utils/link_tester.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

# TPU JOBS
    run_examples_tpu:
        docker:
            - image: circleci/python:3.6
        environment:
            OMP_NUM_THREADS: 1
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - go/install
            - *checkout_ml_testing
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - setup_remote_docker
            - *build_push_docker
            - *deploy_cluster
    cleanup-gke-jobs:
        docker:
            - image: circleci/python:3.6
        steps:
            - gcp-gke/install
            - gcp-gke/update-kubeconfig-with-credentials:
                  cluster: $GKE_CLUSTER
                  perform-login: true
            - *delete_gke_jobs
LysandreJik's avatar
LysandreJik committed
287
288
289
290
workflow_filters: &workflow_filters
    filters:
        branches:
            only:
Lysandre's avatar
Lysandre committed
291
                - master
292
workflows:
LysandreJik's avatar
LysandreJik committed
293
294
295
    version: 2
    build_and_test:
        jobs:
Aymeric Augustin's avatar
Aymeric Augustin committed
296
            - check_code_quality
297
            - check_repository_consistency
Aymeric Augustin's avatar
Aymeric Augustin committed
298
299
300
301
302
            - run_examples_torch
            - run_tests_custom_tokenizers
            - run_tests_torch_and_tf
            - run_tests_torch
            - run_tests_tf
303
            - build_doc
Lysandre's avatar
Lysandre committed
304
            - deploy_doc: *workflow_filters
305
306
307
308
309
310
311
312
313
314
315
316
    tpu_testing_jobs:
        triggers:
            - schedule:
                # Set to run at the first minute of every hour.
                cron: "0 8 * * *"
                filters:
                    branches:
                        only:
                            - master
        jobs:
            - cleanup-gke-jobs
            - run_examples_tpu