Unverified Commit 642e1936 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[GitHub Runner] Fix flax runner (#13357)

* correct

* also comment out multi-gpu test push
parent c76de105
...@@ -106,9 +106,9 @@ jobs: ...@@ -106,9 +106,9 @@ jobs:
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)" python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))" python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
# - name: Fetch the tests to run - name: Fetch the tests to run
# run: | run: |
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
- name: Report fetched tests - name: Report fetched tests
uses: actions/upload-artifact@v2 uses: actions/upload-artifact@v2
...@@ -118,10 +118,9 @@ jobs: ...@@ -118,10 +118,9 @@ jobs:
- name: Run all non-slow tests on GPU - name: Run all non-slow tests on GPU
run: | run: |
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu if [ -f test_list.txt ]; then
# if [ -f test_list.txt ]; then python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt)
# python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt) fi
# fi
- name: Failure short reports - name: Failure short reports
if: ${{ failure() }} if: ${{ failure() }}
...@@ -251,61 +250,60 @@ jobs: ...@@ -251,61 +250,60 @@ jobs:
name: run_all_tests_torch_multi_gpu_test_reports name: run_all_tests_torch_multi_gpu_test_reports
path: reports path: reports
run_tests_flax_multi_gpu: # run_tests_flax_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu] # runs-on: [self-hosted, docker-gpu, multi-gpu]
container: # container:
image: tensorflow/tensorflow:2.4.1-gpu # image: tensorflow/tensorflow:2.4.1-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ # options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: # steps:
- name: Install dependencies # - name: Install dependencies
run: | # run: |
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git # apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html # pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
pip install --upgrade pip # pip install --upgrade pip
pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision] # pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
#
- name: Launcher docker # - name: Launcher docker
uses: actions/checkout@v2 # uses: actions/checkout@v2
with: # with:
fetch-depth: 2 # fetch-depth: 2
#
- name: NVIDIA-SMI # - name: NVIDIA-SMI
continue-on-error: true # continue-on-error: true
run: | # run: |
nvidia-smi # nvidia-smi
#
- name: Are GPUs recognized by our DL frameworks # - name: Are GPUs recognized by our DL frameworks
run: | # run: |
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)" # python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))" # python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
#
# - name: Fetch the tests to run # - name: Fetch the tests to run
# run: | # run: |
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt # python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
#
- name: Report fetched tests # - name: Report fetched tests
uses: actions/upload-artifact@v2 # uses: actions/upload-artifact@v2
with: # with:
name: test_fetched # name: test_fetched
path: test_preparation.txt # path: test_preparation.txt
#
- name: Run all non-slow tests on GPU # - name: Run all non-slow tests on GPU
run: | # run: |
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu
# if [ -f test_list.txt ]; then # if [ -f test_list.txt ]; then
# python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu $(cat test_list.txt) # python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu $(cat test_list.txt)
# fi # fi
#
- name: Failure short reports # - name: Failure short reports
if: ${{ failure() }} # if: ${{ failure() }}
run: cat reports/tests_flax_multi_gpu_failures_short.txt # run: cat reports/tests_flax_multi_gpu_failures_short.txt
#
- name: Test suite reports artifacts # - name: Test suite reports artifacts
if: ${{ always() }} # if: ${{ always() }}
uses: actions/upload-artifact@v2 # uses: actions/upload-artifact@v2
with: # with:
name: run_all_tests_flax_multi_gpu_test_reports # name: run_all_tests_flax_multi_gpu_test_reports
path: reports # path: reports
# run_tests_tf_multi_gpu: # run_tests_tf_multi_gpu:
# runs-on: [self-hosted, docker-gpu, multi-gpu] # runs-on: [self-hosted, docker-gpu, multi-gpu]
......
...@@ -86,7 +86,7 @@ jobs: ...@@ -86,7 +86,7 @@ jobs:
path: reports path: reports
run_all_tests_flax_gpu: run_all_tests_flax_gpu:
runs-on: [self-hosted, docker-gpu, single-gpu] runs-on: [self-hosted, docker-gpu-test, single-gpu]
container: container:
image: tensorflow/tensorflow:2.4.1-gpu image: tensorflow/tensorflow:2.4.1-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
...@@ -291,45 +291,44 @@ jobs: ...@@ -291,45 +291,44 @@ jobs:
name: run_all_tests_tf_multi_gpu_test_reports name: run_all_tests_tf_multi_gpu_test_reports
path: reports path: reports
run_all_tests_flax_multi_gpu: # run_all_tests_flax_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu] # runs-on: [self-hosted, docker-gpu, multi-gpu]
container: # container:
image: tensorflow/tensorflow:2.4.1-gpu # image: tensorflow/tensorflow:2.4.1-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ # options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: # steps:
- name: Launcher docker # - name: Launcher docker
uses: actions/checkout@v2 # uses: actions/checkout@v2
#
- name: NVIDIA-SMI # - name: NVIDIA-SMI
continue-on-error: true # run: |
run: | # nvidia-smi
nvidia-smi #
# - name: Install dependencies
- name: Install dependencies # run: |
run: | # pip install --upgrade pip
pip install --upgrade pip # pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html # pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision] #
# - name: Are GPUs recognized by our DL frameworks
- name: Are GPUs recognized by our DL frameworks # run: |
run: | # python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)" # python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))" #
# - name: Run all tests on GPU
- name: Run all tests on GPU # run: |
run: | # python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests #
# - name: Failure short reports
- name: Failure short reports # if: ${{ always() }}
if: ${{ always() }} # run: cat reports/tests_flax_gpu_failures_short.txt
run: cat reports/tests_flax_gpu_failures_short.txt #
# - name: Test suite reports artifacts
- name: Test suite reports artifacts # if: ${{ always() }}
if: ${{ always() }} # uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v2 # with:
with: # name: run_all_tests_flax_gpu_test_reports
name: run_all_tests_flax_gpu_test_reports # path: reports
path: reports
run_all_tests_torch_cuda_extensions_gpu: run_all_tests_torch_cuda_extensions_gpu:
runs-on: [self-hosted, docker-gpu, single-gpu] runs-on: [self-hosted, docker-gpu, single-gpu]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment