Tests run on Docker (#10681)

* Tests run on Docker Co-authored-by: Morgan <funtowiczmo@gmail.com> * Comments from code review * Reply to itself * Dependencies Co-authored-by: Morgan <funtowiczmo@gmail.com>

Tests run on Docker (#10681)
* Tests run on Docker Co-authored-by: Morgan <funtowiczmo@gmail.com> * Comments from code review * Reply to itself * Dependencies Co-authored-by: Morgan <funtowiczmo@gmail.com>
58f672e6 · Lysandre Debut · GitHub · d41dd535 · 58f672e6 · 58f672e6
Unverified Commit 58f672e6 authored Mar 15, 2021 by Lysandre Debut Committed by GitHub Mar 15, 2021
7 changed files
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -10,73 +10,42 @@ on:
      - "tests/**"
      - ".github/**"
      - "templates/**"
-  # pull_request:
  repository_dispatch:
 jobs:
  run_tests_torch_gpu:
-    runs-on: [self-hosted, gpu, single-gpu]
+    runs-on: [self-hosted, docker-gpu, single-gpu]
+    container:
+      image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
-      - name: Python version
+        uses: actions/checkout@v2
-        run: |
-          which python
-          python --version
-          pip --version
-      - name: Current dir
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Loading cache.
+      - name: NVIDIA-SMI
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-tests_torch_gpu-${{ hashFiles('setup.py') }}
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
        run: |
-          python -m venv .env
+          nvidia-smi
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
+          apt -y update && apt install -y libsndfile1-dev
-          sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
          pip install --upgrade pip
-          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece,speech]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
-          pip install git+https://github.com/huggingface/datasets
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Cuda version:', torch.version.cuda)"
+          python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
-#      - name: Create model files
-#        run: |
-#          source .env/bin/activate
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
      - name: Run all non-slow tests on GPU
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 8
-          CUDA_VISIBLE_DEVICES: 0
+          MKL_NUM_THREADS: 8
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests
-          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -89,68 +58,38 @@ jobs:
          name: run_all_tests_torch_gpu_test_reports
          path: reports
  run_tests_tf_gpu:
-    runs-on: [self-hosted, gpu, single-gpu]
+    runs-on: [self-hosted, docker-gpu, single-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
-      - name: Python version
+        uses: actions/checkout@v2
-        run: |
-          which python
-          python --version
-          pip --version
-      - name: Current dir
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Loading cache.
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-tests_tf_gpu-${{ hashFiles('setup.py') }}
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+      - name: NVIDIA-SMI
        run: |
-          python -m venv .env
+          nvidia-smi
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
          pip install --upgrade pip
-          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece]
-          pip install git+https://github.com/huggingface/datasets
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
-      - name: Create model files
-        run: |
-          source .env/bin/activate
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
-#          transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
      - name: Run all non-slow tests on GPU
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 8
-          CUDA_VISIBLE_DEVICES: 0
+          MKL_NUM_THREADS: 8
+          TF_NUM_INTRAOP_THREADS: 8
+          TF_NUM_INTEROP_THREADS: 1
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests
-          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -163,58 +102,41 @@ jobs:
          name: run_all_tests_tf_gpu_test_reports
          path: reports
  run_tests_torch_multi_gpu:
-    runs-on: [self-hosted, gpu, multi-gpu]
+    runs-on: [self-hosted, docker-gpu, multi-gpu]
+    container:
+      image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
-      - name: Python version
+        uses: actions/checkout@v2
-        run: |
-          which python
-          python --version
-          pip --version
-      - name: Current dir
+      - name: NVIDIA-SMI
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Loading cache.
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
        run: |
-          python -m venv .env
+          nvidia-smi
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
+          apt -y update && apt install -y libsndfile1-dev
-          sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
          pip install --upgrade pip
-          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece,speech]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
-          pip install git+https://github.com/huggingface/datasets
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Cuda version:', torch.version.cuda)"
+          python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
      - name: Run all non-slow tests on GPU
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 8
+          MKL_NUM_THREADS: 8
+          MKL_SERVICE_FORCE_INTEL: 1
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
-          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -228,56 +150,37 @@ jobs:
          path: reports
  run_tests_tf_multi_gpu:
-    runs-on: [self-hosted, gpu, multi-gpu]
+    runs-on: [self-hosted, docker-gpu, multi-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
-      - name: Python version
+        uses: actions/checkout@v2
-        run: |
-          which python
-          python --version
-          pip --version
-      - name: Current dir
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Loading cache.
+      - name: NVIDIA-SMI
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
        run: |
-          python -m venv .env
+          nvidia-smi
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
          pip install --upgrade pip
-          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece]
-          pip install git+https://github.com/huggingface/datasets
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
      - name: Run all non-slow tests on GPU
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 8
+          MKL_NUM_THREADS: 8
+          TF_NUM_INTRAOP_THREADS: 8
+          TF_NUM_INTEROP_THREADS: 1
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
-          python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -289,3 +192,22 @@ jobs:
        with:
          name: run_all_tests_tf_multi_gpu_test_reports
          path: reports
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-latest
+    if: always()
+    needs: [run_tests_torch_gpu, run_tests_tf_gpu, run_tests_torch_multi_gpu, run_tests_tf_multi_gpu]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/download-artifact@v2
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
+        run: |
+          pip install slack_sdk
+          python utils/notification_service.py push
\ No newline at end of file
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
-# configuration notes:
-#
-# - `source .env/bin/activate` is currently needed to be run first thing first in each step. Otherwise
-# the step uses the system-wide python interpreter.
 name: Self-hosted runner (scheduled)
 on:
@@ -15,61 +10,39 @@ on:
 jobs:
  run_all_tests_torch_gpu:
-    runs-on: [self-hosted, gpu, single-gpu]
+    runs-on: [self-hosted, docker-gpu, single-gpu]
+    container:
+      image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
+        uses: actions/checkout@v2
-      - name: Loading cache.
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v  1.2-slow_tests_torch_gpu-${{ hashFiles('setup.py') }}
-      - name: Python version
+      - name: NVIDIA-SMI
        run: |
-          which python
+          nvidia-smi
-          python --version
-          pip --version
-      - name: Current dir
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          python -m venv .env
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
+          apt -y update && apt install -y libsndfile1-dev
          pip install --upgrade pip
-          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
-          pip install git+https://github.com/huggingface/datasets
-          pip list
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Cuda version:', torch.version.cuda)"
+          python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
      - name: Run all tests on GPU
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 16
+          MKL_NUM_THREADS: 16
          RUN_SLOW: yes
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -78,12 +51,13 @@ jobs:
      - name: Run examples tests on GPU
        if: ${{ always() }}
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 16
+          MKL_NUM_THREADS: 16
          RUN_SLOW: yes
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
          pip install -r examples/_tests_requirements.txt
-          python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_gpu examples
+          python -m pytest -n 1 --dist=loadfile --make-reports=examples_torch_gpu examples
      - name: Failure short reports
        if: ${{ always() }}
@@ -92,13 +66,13 @@ jobs:
      - name: Run all pipeline tests on GPU
        if: ${{ always() }}
        env:
-          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 16
-          OMP_NUM_THREADS: 1
+          MKL_NUM_THREADS: 16
          RUN_SLOW: yes
          RUN_PIPELINE_TESTS: yes
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -111,64 +85,39 @@ jobs:
          name: run_all_tests_torch_gpu_test_reports
          path: reports
  run_all_tests_tf_gpu:
-    runs-on: [self-hosted, gpu, single-gpu]
+    runs-on: [self-hosted, docker-gpu, single-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
+        uses: actions/checkout@v2
-      - name: Loading cache.
+      - name: NVIDIA-SMI
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-slow_tests_tf_gpu-${{ hashFiles('setup.py') }}
-      - name: Python version
        run: |
-          which python
+          nvidia-smi
-          python --version
-          pip --version
-      - name: Current dir
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          python -m venv .env
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
          pip install --upgrade pip
-          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install .[sklearn,testing,onnx,sentencepiece]
-          pip install git+https://github.com/huggingface/datasets
-          pip list
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
      - name: Run all tests on GPU
        env:
-          OMP_NUM_THREADS: 1
          RUN_SLOW: yes
+          HF_HOME: /mnt/cache
+          OMP_NUM_THREADS: 16
+          TF_NUM_INTEROP_THREADS: 1
+          TF_NUM_INTRAOP_THREADS: 16
+          MKL_NUM_THREADS: 16
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -177,17 +126,19 @@ jobs:
      - name: Run all pipeline tests on GPU
        if: ${{ always() }}
        env:
-          TF_FORCE_GPU_ALLOW_GROWTH: "true"
-          OMP_NUM_THREADS: 1
          RUN_SLOW: yes
+          HF_HOME: /mnt/cache
+          OMP_NUM_THREADS: 16
          RUN_PIPELINE_TESTS: yes
+          TF_NUM_INTEROP_THREADS: 1
+          TF_NUM_INTRAOP_THREADS: 16
+          MKL_NUM_THREADS: 16
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
-        run: cat reports/tests_tf_pipelines_gpu_failures_short.txt
+        run: cat reports/tests_tf_pipeline_gpu_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
@@ -197,92 +148,55 @@ jobs:
          path: reports
  run_all_tests_torch_multi_gpu:
-    runs-on: [self-hosted, gpu, multi-gpu]
+    runs-on: [self-hosted, docker-gpu, multi-gpu]
+    container:
+      image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
+        uses: actions/checkout@v2
-      - name: Loading cache.
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-slow_tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
-      - name: Python version
+      - name: NVIDIA-SMI
        run: |
-          which python
+          nvidia-smi
-          python --version
-          pip --version
-      - name: Current dir
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          python -m venv .env
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
+          apt -y update && apt install -y libsndfile1-dev
          pip install --upgrade pip
-          pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
-          pip install git+https://github.com/huggingface/datasets
-          pip install fairscale
-          pip install deepspeed
-          pip list
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
+          python -c "import torch; print('Cuda version:', torch.version.cuda)"
+          python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
          python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
-      - name: Run all tests on multi-GPU
+      - name: Run all tests on GPU
        env:
-          OMP_NUM_THREADS: 1
          RUN_SLOW: yes
+          HF_HOME: /mnt/cache
+          OMP_NUM_THREADS: 16
+          MKL_NUM_THREADS: 16
+          MKL_SERVICE_FORCE_INTEL: 1
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
        run: cat reports/tests_torch_multi_gpu_failures_short.txt
-      - name: Run examples tests on multi-GPU
+      - name: Run all pipeline tests on GPU
-        if: ${{ always() }}
-        env:
-          OMP_NUM_THREADS: 1
-          RUN_SLOW: yes
-        run: |
-          source .env/bin/activate
-          pip install -r examples/_tests_requirements.txt
-          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_examples_multi_gpu examples
-      - name: Failure short reports
-        if: ${{ always() }}
-        run: cat reports/tests_torch_examples_multi_gpu_failures_short.txt
-      - name: Run all pipeline tests on multi-GPU
        if: ${{ always() }}
        env:
-          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 16
-          OMP_NUM_THREADS: 1
+          MKL_NUM_THREADS: 16
          RUN_SLOW: yes
          RUN_PIPELINE_TESTS: yes
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -296,76 +210,55 @@ jobs:
          path: reports
  run_all_tests_tf_multi_gpu:
-    runs-on: [self-hosted, gpu, multi-gpu]
+    runs-on: [self-hosted, docker-gpu, multi-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
-      - uses: actions/checkout@v2
+      - name: Launcher docker
+        uses: actions/checkout@v2
-      - name: Loading cache.
-        uses: actions/cache@v2
-        id: cache
-        with:
-          path: .env
-          key: v1.2-slow_tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
-      - name: Python version
-        run: |
-          which python
-          python --version
-          pip --version
-      - name: Current dir
+      - name: NVIDIA-SMI
-        run: pwd
-      - run: nvidia-smi
-      - name: Kill any run-away pytest processes
-        run: (pkill -f tests; pkill -f examples) || echo "no zombies"
-      - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
-        if: steps.cache.outputs.cache-hit != 'true'
        run: |
-          python -m venv .env
+          nvidia-smi
-          source .env/bin/activate
-          which python
-          python --version
-          pip --version
      - name: Install dependencies
        run: |
-          source .env/bin/activate
          pip install --upgrade pip
-          pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
+          pip install .[sklearn,testing,onnx,sentencepiece]
-          pip install git+https://github.com/huggingface/datasets
-          pip list
      - name: Are GPUs recognized by our DL frameworks
        run: |
-          source .env/bin/activate
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
          TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
-      - name: Run all tests on multi-GPU
+      - name: Run all tests on GPU
        env:
-          OMP_NUM_THREADS: 1
+          OMP_NUM_THREADS: 16
          RUN_SLOW: yes
+          MKL_NUM_THREADS: 16
+          TF_NUM_INTEROP_THREADS: 1
+          TF_NUM_INTRAOP_THREADS: 16
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
        run: cat reports/tests_tf_multi_gpu_failures_short.txt
-      - name: Run all pipeline tests on multi-GPU
+      - name: Run all pipeline tests on GPU
        if: ${{ always() }}
        env:
-          TF_FORCE_GPU_ALLOW_GROWTH: "true"
+          OMP_NUM_THREADS: 16
-          OMP_NUM_THREADS: 1
          RUN_SLOW: yes
          RUN_PIPELINE_TESTS: yes
+          MKL_NUM_THREADS: 16
+          TF_NUM_INTEROP_THREADS: 1
+          TF_NUM_INTRAOP_THREADS: 16
+          HF_HOME: /mnt/cache
        run: |
-          source .env/bin/activate
+          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
-          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
      - name: Failure short reports
        if: ${{ always() }}
@@ -377,3 +270,23 @@ jobs:
        with:
          name: run_all_tests_tf_multi_gpu_test_reports
          path: reports
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-latest
+    if: always()
+    needs: [run_all_tests_torch_gpu, run_all_tests_tf_gpu, run_all_tests_torch_multi_gpu, run_all_tests_tf_multi_gpu]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/download-artifact@v2
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
+        run: |
+          pip install slack_sdk
+          python utils/notification_service.py scheduled
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,7 @@ _deps = [
    "psutil",
    "pydantic",
    "pytest",
+    "pytest-sugar",
    "pytest-xdist",
    "python>=3.6.0",
    "recommonmark",
@@ -225,6 +226,7 @@ else:
 extras["tokenizers"] = deps_list("tokenizers")
 extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
+extras["onnx"] = deps_list("onnxconverter-common", "keras2onnx") + extras["onnxruntime"]
 extras["modelcreation"] = deps_list("cookiecutter")
 extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
@@ -232,7 +234,7 @@ extras["speech"] = deps_list("soundfile", "torchaudio")
 extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
 extras["testing"] = (
-    deps_list("pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets")
+    deps_list("pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar")
    + extras["retrieval"]
    + extras["modelcreation"]
 )

--- a/src/transformers/dependency_versions_table.py
+++ b/src/transformers/dependency_versions_table.py
@@ -28,6 +28,7 @@ deps = {
    "psutil": "psutil",
    "pydantic": "pydantic",
    "pytest": "pytest",
+    "pytest-sugar": "pytest-sugar",
    "pytest-xdist": "pytest-xdist",
    "python": "python>=3.6.0",
    "recommonmark": "recommonmark",

--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -137,6 +137,17 @@ def slow(test_case):
        return test_case
+def tooslow(test_case):
+    """
+    Decorator marking a test as too slow.
+    Slow tests are skipped while they're in the process of being fixed. No test should stay tagged as "tooslow" as
+    these will not be tested by the CI.
+    """
+    return unittest.skip("test is too slow")(test_case)
 def custom_tokenizers(test_case):
    """
    Decorator marking a test for a custom tokenizer.

--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -25,7 +25,14 @@ from importlib import import_module
 from typing import List, Tuple
 from transformers import is_tf_available
-from transformers.testing_utils import _tf_gpu_memory_limit, is_pt_tf_cross_test, require_onnx, require_tf, slow
+from transformers.testing_utils import (
+    _tf_gpu_memory_limit,
+    is_pt_tf_cross_test,
+    require_onnx,
+    require_tf,
+    slow,
+    tooslow,
+)
 if is_tf_available():
@@ -129,7 +136,7 @@ class TFModelTesterMixin:
                self.assert_outputs_same(after_outputs, outputs)
-    @slow
+    @tooslow
    def test_graph_mode(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        for model_class in self.all_model_classes:
@@ -143,7 +150,7 @@ class TFModelTesterMixin:
            outputs = run_in_graph_mode()
            self.assertIsNotNone(outputs)
-    @slow
+    @tooslow
    def test_xla_mode(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        for model_class in self.all_model_classes:
@@ -184,7 +191,7 @@ class TFModelTesterMixin:
                expected_arg_names = ["input_ids"]
                self.assertListEqual(arg_names[:1], expected_arg_names)
-    @slow
+    @tooslow
    def test_saved_model_creation(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.output_hidden_states = False
@@ -205,7 +212,7 @@ class TFModelTesterMixin:
            saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
            self.assertTrue(os.path.exists(saved_model_dir))
-    @slow
+    @tooslow
    def test_saved_model_creation_extended(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.output_hidden_states = True
@@ -314,7 +321,7 @@ class TFModelTesterMixin:
            onnxruntime.InferenceSession(onnx_model.SerializeToString())
-    @slow
+    @tooslow
    def test_mixed_precision(self):
        tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
@@ -488,7 +495,7 @@ class TFModelTesterMixin:
            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 4e-2)
-    @slow
+    @tooslow
    def test_train_pipeline_custom_model(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        # head_mask and decoder_head_mask has different shapes than other input args
@@ -909,7 +916,7 @@ class TFModelTesterMixin:
            model(inputs)
-    @slow
+    @tooslow
    def test_graph_mode_with_inputs_embeds(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/utils/notification_service.py
+++ b/utils/notification_service.py
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+import sys
+from slack_sdk import WebClient
+def handle_test_results(test_results):
+    expressions = test_results.split(" ")
+    failed = 0
+    success = 0
+    # When the output is short enough, the output is surrounded by = signs: "== OUTPUT =="
+    # When it is too long, those signs are not present.
+    time_spent = expressions[-2] if "=" in expressions[-1] else expressions[-1]
+    for i, expression in enumerate(expressions):
+        if "failed" in expression:
+            failed += int(expressions[i - 1])
+        if "passed" in expression:
+            success += int(expressions[i - 1])
+    return failed, success, time_spent
+def format_for_slack(total_results, results, scheduled: bool):
+    print(results)
+    header = {
+        "type": "header",
+        "text": {
+            "type": "plain_text",
+            "text": "🤗 Results of the scheduled tests, March 11, 2021." if scheduled else "🤗 Self-push results",
+            "emoji": True,
+        },
+    }
+    total = (
+        {
+            "type": "section",
+            "fields": [
+                {"type": "mrkdwn", "text": f"*Failures:*\n❌ {total_results['failed']} failures."},
+                {"type": "mrkdwn", "text": f"*Passed:*\n✅ {total_results['success']} tests passed."},
+            ],
+        }
+        if total_results["failed"] > 0
+        else {
+            "type": "section",
+            "fields": [{"type": "mrkdwn", "text": f"*Congrats!*\nAll {total_results['success']} tests pass."}],
+        }
+    )
+    blocks = [header, total]
+    if total_results["failed"] > 0:
+        for key, result in results.items():
+            print(key, result)
+            blocks.append({"type": "header", "text": {"type": "plain_text", "text": key, "emoji": True}})
+            blocks.append(
+                {
+                    "type": "section",
+                    "fields": [
+                        {
+                            "type": "mrkdwn",
+                            "text": f"*Results:*\n{result['failed']} failed, {result['success']} passed.",
+                        },
+                        {"type": "mrkdwn", "text": f"*Time spent:*\n{result['time_spent']}"},
+                    ],
+                }
+            )
+    else:
+        for key, result in results.items():
+            blocks.append(
+                {"type": "section", "fields": [{"type": "mrkdwn", "text": f"*{key}*\n{result['time_spent']}."}]}
+            )
+    footer = {
+        "type": "section",
+        "text": {
+            "type": "mrkdwn",
+            "text": "<https://github.com/huggingface/transformers/actions/workflows/self-scheduled.yml|View on GitHub>"
+            if scheduled
+            else "<https://github.com/huggingface/transformers/actions/workflows/self-push.yml|View on GitHub>",
+        },
+    }
+    blocks.append(footer)
+    blocks = {"blocks": blocks}
+    return blocks
+if __name__ == "__main__":
+    scheduled = sys.argv[1] == "scheduled"
+    if scheduled:
+        # The scheduled run has several artifacts for each job.
+        file_paths = {
+            "TF Single GPU": {
+                "common": "run_all_tests_tf_gpu_test_reports/tests_tf_gpu_[].txt",
+                "pipeline": "run_all_tests_tf_gpu_test_reports/tests_tf_pipeline_gpu_[].txt",
+            },
+            "Torch Single GPU": {
+                "common": "run_all_tests_torch_gpu_test_reports/tests_torch_gpu_[].txt",
+                "pipeline": "run_all_tests_torch_gpu_test_reports/tests_torch_pipeline_gpu_[].txt",
+                "examples": "run_all_tests_torch_gpu_test_reports/examples_torch_gpu_[].txt",
+            },
+            "TF Multi GPU": {
+                "common": "run_all_tests_tf_multi_gpu_test_reports/tests_tf_multi_gpu_[].txt",
+                "pipeline": "run_all_tests_tf_multi_gpu_test_reports/tests_tf_pipeline_multi_gpu_[].txt",
+            },
+            "Torch Multi GPU": {
+                "common": "run_all_tests_torch_multi_gpu_test_reports/tests_torch_multi_gpu_[].txt",
+                "pipeline": "run_all_tests_torch_multi_gpu_test_reports/tests_torch_pipeline_multi_gpu_[].txt",
+            },
+        }
+    else:
+        file_paths = {
+            "TF Single GPU": {"common": "run_all_tests_tf_gpu_test_reports/tests_tf_gpu_[].txt"},
+            "Torch Single GPU": {"common": "run_all_tests_torch_gpu_test_reports/tests_torch_gpu_[].txt"},
+            "TF Multi GPU": {"common": "run_all_tests_tf_multi_gpu_test_reports/tests_tf_multi_gpu_[].txt"},
+            "Torch Multi GPU": {"common": "run_all_tests_torch_multi_gpu_test_reports/tests_torch_multi_gpu_[].txt"},
+        }
+    client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
+    channel_id = os.environ["CI_SLACK_CHANNEL_ID"]
+    try:
+        results = {}
+        for job, file_dict in file_paths.items():
+            # Single return value for failed/success across steps of a same job
+            results[job] = {"failed": 0, "success": 0, "time_spent": "", "failures": ""}
+            for key, file_path in file_dict.items():
+                with open(file_path.replace("[]", "stats")) as f:
+                    failed, success, time_spent = handle_test_results(f.read())
+                    results[job]["failed"] += failed
+                    results[job]["success"] += success
+                    results[job]["time_spent"] += time_spent[1:-1] + ", "
+                with open(file_path.replace("[]", "summary_short")) as f:
+                    for line in f:
+                        if re.search("FAILED", line):
+                            results[job]["failures"] += line
+            # Remove the trailing ", "
+            results[job]["time_spent"] = results[job]["time_spent"][:-2]
+        test_results_keys = ["failed", "success"]
+        total = {"failed": 0, "success": 0}
+        for job, job_result in results.items():
+            for result_key in test_results_keys:
+                total[result_key] += job_result[result_key]
+        to_be_sent_to_slack = format_for_slack(total, results, scheduled)
+        result = client.chat_postMessage(
+            channel=channel_id,
+            blocks=to_be_sent_to_slack["blocks"],
+        )
+        for job, job_result in results.items():
+            if len(job_result["failures"]):
+                client.chat_postMessage(
+                    channel=channel_id, text=f"{job}\n{job_result['failures']}", thread_ts=result["ts"]
+                )
+    except Exception as e:
+        # Voluntarily catch every exception and send it to Slack.
+        raise Exception(f"Setup error: no artifacts were found. Error: {e}") from e