Enable Past CI (#17919)

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Enable Past CI (#17919)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
f6814372 · Yih-Dar · GitHub · 5ae087cf · f6814372 · f6814372
Unverified Commit f6814372 authored Jul 05, 2022 by Yih-Dar Committed by GitHub Jul 05, 2022
5 changed files
--- a/.github/workflows/build-past-ci-docker-images.yml
+++ b/.github/workflows/build-past-ci-docker-images.yml
+name: Build docker images (Past CI)
+on:
+  push:
+    branches:
+      - past-ci-docker-image*
+concurrency:
+  group: docker-images-builds
+  cancel-in-progress: false
+jobs:
+  past-pytorch-docker:
+    name: "Past PyTorch Docker"
+    strategy:
+      fail-fast: false
+      matrix:
+        version: ["1.10", "1.9", "1.8", "1.7", "1.6", "1.5", "1.4"]
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      -
+        name: Check out code
+        uses: actions/checkout@v2
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: ./docker/transformers-past-gpu
+          build-args: |
+            REF=main
+            FRAMEWORK=pytorch
+            VERSION=${{ matrix.version }}
+          push: true
+          tags: huggingface/transformers-pytorch-past-${{ matrix.version }}-gpu
+  past-tensorflow-docker:
+    name: "Past TensorFlow Docker"
+    strategy:
+      fail-fast: false
+      matrix:
+        version: ["2.8", "2.7", "2.6", "2.5"]
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      -
+        name: Check out code
+        uses: actions/checkout@v2
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: ./docker/transformers-past-gpu
+          build-args: |
+            REF=main
+            FRAMEWORK=tensorflow
+            VERSION=${{ matrix.version }}
+          push: true
+          tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu
+  past-tensorflow-docker-2-4:
+    name: "Past TensorFlow Docker"
+    strategy:
+      fail-fast: false
+      matrix:
+        version: ["2.4"]
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      -
+        name: Check out code
+        uses: actions/checkout@v2
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: ./docker/transformers-past-gpu
+          build-args: |
+            REF=main
+            BASE_DOCKER_IMAGE=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04
+            FRAMEWORK=tensorflow
+            VERSION=${{ matrix.version }}
+          push: true
+          tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu
\ No newline at end of file
--- a/.github/workflows/self-past-caller.yml
+++ b/.github/workflows/self-past-caller.yml
+name: Self-hosted runner (past-ci-caller)
+on:
+  push:
+    branches:
+      - run-past-ci*
+jobs:
+  run_past_ci_pytorch_1-10:
+    name: PyTorch 1.10
+    if: always()
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.10"
+    secrets: inherit
+  run_past_ci_pytorch_1-9:
+    name: PyTorch 1.9
+    if: always()
+    needs: [run_past_ci_pytorch_1-10]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.9"
+    secrets: inherit
+  run_past_ci_pytorch_1-8:
+    name: PyTorch 1.8
+    if: always()
+    needs: [run_past_ci_pytorch_1-9]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.8"
+    secrets: inherit
+  run_past_ci_pytorch_1-7:
+    name: PyTorch 1.7
+    if: always()
+    needs: [run_past_ci_pytorch_1-8]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.7"
+    secrets: inherit
+  run_past_ci_pytorch_1-6:
+    name: PyTorch 1.6
+    if: always()
+    needs: [run_past_ci_pytorch_1-7]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.6"
+    secrets: inherit
+  run_past_ci_pytorch_1-5:
+    name: PyTorch 1.5
+    if: always()
+    needs: [run_past_ci_pytorch_1-6]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.5"
+    secrets: inherit
+  run_past_ci_pytorch_1-4:
+    name: PyTorch 1.4
+    if: always()
+    needs: [run_past_ci_pytorch_1-5]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: pytorch
+      version: "1.4"
+    secrets: inherit
+  run_past_ci_tensorflow_2-8:
+    name: TensorFlow 2.8
+    if: always()
+    needs: [run_past_ci_pytorch_1-4]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: tensorflow
+      version: "2.8"
+    secrets: inherit
+  run_past_ci_tensorflow_2-7:
+    name: TensorFlow 2.7
+    if: always()
+    needs: [run_past_ci_tensorflow_2-8]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: tensorflow
+      version: "2.7"
+    secrets: inherit
+  run_past_ci_tensorflow_2-6:
+    name: TensorFlow 2.6
+    if: always()
+    needs: [run_past_ci_tensorflow_2-7]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: tensorflow
+      version: "2.6"
+    secrets: inherit
+  run_past_ci_tensorflow_2-5:
+    name: TensorFlow 2.5
+    if: always()
+    needs: [run_past_ci_tensorflow_2-6]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: tensorflow
+      version: "2.5"
+    secrets: inherit
+  run_past_ci_tensorflow_2-4:
+    name: TensorFlow 2.4
+    if: always()
+    needs: [run_past_ci_tensorflow_2-5]
+    uses: ./.github/workflows/self-past.yml
+    with:
+      framework: tensorflow
+      version: "2.4"
+    secrets: inherit
\ No newline at end of file
--- a/.github/workflows/self-past.yml
+++ b/.github/workflows/self-past.yml
+name: Self-hosted runner (past)
+# Note that each job's dependencies go into a corresponding docker file.
+#
+# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
+# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
+# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
+on:
+  workflow_call:
+    inputs:
+      framework:
+        required: true
+        type: string
+      version:
+        required: true
+        type: string
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+jobs:
+  setup:
+    name: Setup
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout transformers
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 2
+      - name: Cleanup
+        run: |
+          rm -rf tests/__pycache__
+          rm -rf tests/models/__pycache__
+          rm -rf reports
+      - id: set-matrix
+        name: Identify models to test
+        run: |
+          cd tests
+          echo "::set-output name=matrix::$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')"
+  run_tests_single_gpu:
+    name: Model tests
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [single-gpu]
+    runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
+    container:
+      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+  run_tests_multi_gpu:
+    name: Model tests
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
+        machine_type: [multi-gpu]
+    runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
+    container:
+      image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    needs: setup
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-latest
+    if: always()
+    needs: [setup, run_tests_single_gpu, run_tests_multi_gpu]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/download-artifact@v2
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
+          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
+          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
+          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
+          CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
+        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
+        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          pip install slack_sdk
+          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
\ No newline at end of file
--- a/docker/transformers-past-gpu/Dockerfile
+++ b/docker/transformers-past-gpu/Dockerfile
+ARG BASE_DOCKER_IMAGE="nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04"
+FROM $BASE_DOCKER_IMAGE
+LABEL maintainer="Hugging Face"
+ARG DEBIAN_FRONTEND=noninteractive
+# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands)
+SHELL ["sh", "-lc"]
+RUN apt update
+RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
+RUN git lfs install
+RUN python3 -m pip install --no-cache-dir --upgrade pip
+ARG REF=main
+RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime]
+# When installing in editable mode, `transformers` is not recognized as a package.
+# this line must be added in order for python to be aware of transformers.
+RUN cd transformers && python3 setup.py develop
+ARG FRAMEWORK
+ARG VERSION
+# Remove all frameworks
+# (`accelerate` requires `torch`, and this causes import issues for TF-only testing)
+RUN python3 -m pip uninstall -y torch torchvision torchaudio accelerate tensorflow jax flax
+# Get the libraries and their versions to install, and write installation command to `~/.profile`.
+RUN python3 ./transformers/utils/past_ci_versions.py --framework $FRAMEWORK --version $VERSION
+# Install the target framework
+RUN echo "INSTALL_CMD = $INSTALL_CMD"
+RUN $INSTALL_CMD
+# Having installation problems for torch-scatter with torch <= 1.6. Disable so we have the same set of tests.
+# (This part will be removed once the logic of using `past_ci_versions.py` is used in other Dockerfile files.)
+# # Use installed torch version for `torch-scatter`.
+# # (The env. variable $CUDA is defined in `past_ci_versions.py`)
+# RUN [ "$FRAMEWORK" = "pytorch" ] && python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html || echo "torch-scatter not to be installed"
+RUN python3 -m pip install -U "itsdangerous<2.1.0"
--- a/utils/past_ci_versions.py
+++ b/utils/past_ci_versions.py
+import argparse
+import os
+past_versions_testing = {
+    "pytorch": {
+        "1.10": {
+            "torch": "1.10.2",
+            "torchvision": "0.11.3",
+            "torchaudio": "0.10.2",
+            "python": 3.9,
+            "cuda": "cu113",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.10.2 torchvision==0.11.3 torchaudio==0.10.2"
+                " --extra-index-url https://download.pytorch.org/whl/cu113"
+            ),
+        },
+        # torchaudio < 0.10 has no CUDA-enabled binary distributions
+        "1.9": {
+            "torch": "1.9.1",
+            "torchvision": "0.10.1",
+            "torchaudio": "0.9.1",
+            "python": 3.9,
+            "cuda": "cu111",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.9.1 torchvision==0.10.1 torchaudio==0.9.1"
+                " --extra-index-url https://download.pytorch.org/whl/cu111"
+            ),
+        },
+        "1.8": {
+            "torch": "1.8.1",
+            "torchvision": "0.9.1",
+            "torchaudio": "0.8.1",
+            "python": 3.9,
+            "cuda": "cu111",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.8.1 torchvision==0.9.1 torchaudio==0.8.1"
+                " --extra-index-url https://download.pytorch.org/whl/cu111"
+            ),
+        },
+        "1.7": {
+            "torch": "1.7.1",
+            "torchvision": "0.8.2",
+            "torchaudio": "0.7.2",
+            "python": 3.9,
+            "cuda": "cu110",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2"
+                " --extra-index-url https://download.pytorch.org/whl/cu110"
+            ),
+        },
+        "1.6": {
+            "torch": "1.6.0",
+            "torchvision": "0.7.0",
+            "torchaudio": "0.6.0",
+            "python": 3.8,
+            "cuda": "cu101",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.6.0 torchvision==0.7.0 torchaudio==0.6.0"
+                " --extra-index-url https://download.pytorch.org/whl/cu101"
+            ),
+        },
+        "1.5": {
+            "torch": "1.5.1",
+            "torchvision": "0.6.1",
+            "torchaudio": "0.5.1",
+            "python": 3.8,
+            "cuda": "cu101",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.5.1 torchvision==0.6.1 torchaudio==0.5.1"
+                " --extra-index-url https://download.pytorch.org/whl/cu101"
+            ),
+        },
+        "1.4": {
+            "torch": "1.4.0",
+            "torchvision": "0.5.0",
+            "torchaudio": "0.4.0",
+            "python": 3.8,
+            "cuda": "cu100",
+            "install": (
+                "python3 -m pip install --no-cache-dir -U torch==1.4.0 torchvision==0.5.0 torchaudio==0.4.0"
+                " --extra-index-url https://download.pytorch.org/whl/cu100"
+            ),
+        },
+    },
+    "tensorflow": {
+        "2.8": {
+            "tensorflow": "2.8.2",
+            "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.8.2",
+        },
+        "2.7": {
+            "tensorflow": "2.7.3",
+            "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.7.3",
+        },
+        "2.6": {
+            "tensorflow": "2.6.5",
+            "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.6.5",
+        },
+        "2.5": {
+            "tensorflow": "2.5.3",
+            "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.5.3",
+        },
+        # need another `nvidia:cuda` docker image, otherwise GPU not working
+        "2.4": {
+            "tensorflow": "2.4.4",
+            "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.4.4",
+            # This should be specified as a docker build argument.
+            # We keep the information here for reference only.
+            "base_docker": "nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04",
+        },
+    },
+}
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Choose the framework and version to install")
+    parser.add_argument("--framework", help="The framework to install. Should be `torch` or `tensorflow`", type=str)
+    parser.add_argument("--version", help="The version of the framework to install.", type=str)
+    args = parser.parse_args()
+    info = past_versions_testing[args.framework][args.version]
+    os.system(f'echo "export INSTALL_CMD=\'{info["install"]}\'" >> ~/.profile')
+    print(f'echo "export INSTALL_CMD=\'{info["install"]}\'" >> ~/.profile')
+    cuda = ""
+    if args.framework == "pytorch":
+        cuda = info["cuda"]
+    os.system(f"echo \"export CUDA='{cuda}'\" >> ~/.profile")
+    print(f"echo \"export CUDA='{cuda}'\" >> ~/.profile")