Unverified Commit 659b27fd authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Print more library versions in CI (#17384)



* print more lib. versions and just befor test runs

* update print_env_pt.py

* rename to print_env

* Disable warning + better job name

* print python version
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 0932adb3
...@@ -32,9 +32,7 @@ jobs: ...@@ -32,9 +32,7 @@ jobs:
- name: GPU visibility - name: GPU visibility
run: | run: |
utils/print_env_pt.py python3 utils/print_env.py
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
- name: Prepare files for doctests - name: Prepare files for doctests
run: | run: |
......
...@@ -41,7 +41,7 @@ jobs: ...@@ -41,7 +41,7 @@ jobs:
- name: Are GPUs recognized by our DL frameworks - name: Are GPUs recognized by our DL frameworks
run: | run: |
utils/print_env_pt.py utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
run: | run: |
...@@ -109,7 +109,7 @@ jobs: ...@@ -109,7 +109,7 @@ jobs:
- name: Are GPUs recognized by our DL frameworks - name: Are GPUs recognized by our DL frameworks
run: | run: |
utils/print_env_pt.py utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
env: env:
...@@ -163,7 +163,7 @@ jobs: ...@@ -163,7 +163,7 @@ jobs:
- name: Are GPUs recognized by our DL frameworks - name: Are GPUs recognized by our DL frameworks
run: | run: |
utils/print_env_pt.py utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
run: | run: |
...@@ -206,7 +206,7 @@ jobs: ...@@ -206,7 +206,7 @@ jobs:
- name: Are GPUs recognized by our DL frameworks - name: Are GPUs recognized by our DL frameworks
run: | run: |
utils/print_env_pt.py utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
run: | run: |
......
...@@ -87,17 +87,6 @@ jobs: ...@@ -87,17 +87,6 @@ jobs:
image: huggingface/transformers-all-latest-gpu image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: steps:
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Are GPUs recognized by our DL frameworks
working-directory: /transformers
run: |
utils/print_env_pt.py
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
- name: Echo folder ${{ matrix.folders }} - name: Echo folder ${{ matrix.folders }}
shell: bash shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
...@@ -114,6 +103,15 @@ jobs: ...@@ -114,6 +103,15 @@ jobs:
working-directory: /transformers working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run all non-slow selected tests on GPU - name: Run all non-slow selected tests on GPU
working-directory: /transformers working-directory: /transformers
run: | run: |
...@@ -146,17 +144,6 @@ jobs: ...@@ -146,17 +144,6 @@ jobs:
image: huggingface/transformers-all-latest-gpu image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps: steps:
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Are GPUs recognized by our DL frameworks
working-directory: /transformers
run: |
utils/print_env_pt.py
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
- name: Echo folder ${{ matrix.folders }} - name: Echo folder ${{ matrix.folders }}
shell: bash shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
...@@ -173,6 +160,15 @@ jobs: ...@@ -173,6 +160,15 @@ jobs:
working-directory: /transformers working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run all non-slow selected tests on GPU - name: Run all non-slow selected tests on GPU
env: env:
MKL_SERVICE_FORCE_INTEL: 1 MKL_SERVICE_FORCE_INTEL: 1
...@@ -210,19 +206,19 @@ jobs: ...@@ -210,19 +206,19 @@ jobs:
with: with:
fetch-depth: 2 fetch-depth: 2
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
apt -y update && apt install -y libaio-dev apt -y update && apt install -y libaio-dev
pip install --upgrade pip pip install --upgrade pip
pip install .[deepspeed-testing] pip install .[deepspeed-testing]
- name: Are GPUs recognized by our DL frameworks - name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
run: | run: |
utils/print_env_pt.py python utils/print_env.py
- name: Run all non-slow selected tests on GPU - name: Run all non-slow selected tests on GPU
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
...@@ -259,10 +255,6 @@ jobs: ...@@ -259,10 +255,6 @@ jobs:
with: with:
fetch-depth: 2 fetch-depth: 2
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Install dependencies - name: Install dependencies
run: | run: |
apt -y update && apt install -y libaio-dev apt -y update && apt install -y libaio-dev
...@@ -270,9 +262,13 @@ jobs: ...@@ -270,9 +262,13 @@ jobs:
rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
pip install .[testing,deepspeed,fairscale] pip install .[testing,deepspeed,fairscale]
- name: Are GPUs recognized by our DL frameworks - name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
run: | run: |
utils/print_env_pt.py python utils/print_env.py
- name: Run all non-slow selected tests on GPU - name: Run all non-slow selected tests on GPU
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests. # TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
......
...@@ -56,13 +56,6 @@ jobs: ...@@ -56,13 +56,6 @@ jobs:
run: | run: |
nvidia-smi nvidia-smi
- name: GPU visibility
working-directory: /transformers
run: |
utils/print_env_pt.py
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
run_tests_single_gpu: run_tests_single_gpu:
name: Model tests name: Model tests
strategy: strategy:
...@@ -91,6 +84,15 @@ jobs: ...@@ -91,6 +84,15 @@ jobs:
working-directory: /transformers working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
working-directory: /transformers working-directory: /transformers
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
...@@ -135,6 +137,15 @@ jobs: ...@@ -135,6 +137,15 @@ jobs:
working-directory: /transformers working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
working-directory: /transformers working-directory: /transformers
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
...@@ -163,6 +174,15 @@ jobs: ...@@ -163,6 +174,15 @@ jobs:
working-directory: /transformers working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run examples tests on GPU - name: Run examples tests on GPU
working-directory: /transformers working-directory: /transformers
run: | run: |
...@@ -197,6 +217,15 @@ jobs: ...@@ -197,6 +217,15 @@ jobs:
working-directory: /transformers working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }} run: git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run all pipeline tests on GPU - name: Run all pipeline tests on GPU
working-directory: /transformers working-directory: /transformers
env: env:
...@@ -233,6 +262,15 @@ jobs: ...@@ -233,6 +262,15 @@ jobs:
run: | run: |
git fetch && git checkout ${{ github.sha }} git fetch && git checkout ${{ github.sha }}
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Run all pipeline tests on GPU - name: Run all pipeline tests on GPU
working-directory: /transformers working-directory: /transformers
env: env:
...@@ -276,6 +314,15 @@ jobs: ...@@ -276,6 +314,15 @@ jobs:
git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /workspace/transformers
run: |
python utils/print_env.py
- name: Run all tests on GPU - name: Run all tests on GPU
working-directory: /workspace/transformers working-directory: /workspace/transformers
run: | run: |
...@@ -293,7 +340,6 @@ jobs: ...@@ -293,7 +340,6 @@ jobs:
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
send_results: send_results:
name: Send results to webhook name: Send results to webhook
runs-on: ubuntu-latest runs-on: ubuntu-latest
......
...@@ -17,12 +17,41 @@ ...@@ -17,12 +17,41 @@
# this script dumps information about the environment # this script dumps information about the environment
import torch import os
import sys
import transformers
print("Torch version:", torch.__version__)
print("Cuda available:", torch.cuda.is_available()) os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
print("Cuda version:", torch.version.cuda)
print("CuDNN version:", torch.backends.cudnn.version()) print("Python version:", sys.version)
print("Number of GPUs available:", torch.cuda.device_count()) print("transformers version:", transformers.__version__)
print("NCCL version:", torch.cuda.nccl.version())
try:
import torch
print("Torch version:", torch.__version__)
print("Cuda available:", torch.cuda.is_available())
print("Cuda version:", torch.version.cuda)
print("CuDNN version:", torch.backends.cudnn.version())
print("Number of GPUs available:", torch.cuda.device_count())
print("NCCL version:", torch.cuda.nccl.version())
except ImportError:
print("Torch version:", None)
try:
import deepspeed
print("DeepSpeed version:", deepspeed.__version__)
except ImportError:
print("DeepSpeed version:", None)
try:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("TF GPUs available:", bool(tf.config.list_physical_devices("GPU")))
print("Number of TF GPUs available:", len(tf.config.list_physical_devices("GPU")))
except ImportError:
print("TensorFlow version:", None)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment