Unverified Commit 96f5ccea authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

CI/CD - Upgrade dependency versions in pipeline (#671)



Upgrade dependency versions in Azure pipeline:

* Remove Python 3.6 and add Python 3.10 for cpu-unit-test
* Upgrade CUDA from 11.1 to 12.4 for cuda-unit-test
* Update labels accordingly

---------
Co-authored-by: default avatarDilip Patlolla <dilipreddi@gmail.com>
parent 7cef624e
......@@ -7,6 +7,7 @@ trigger:
pool:
name: SuperBench CI
demands: ansible-agent
vmImage: ubuntu-latest
container:
......
......@@ -7,12 +7,12 @@ trigger:
strategy:
matrix:
python-3.6:
imageTag: '3.6'
python-3.7:
imageTag: '3.7'
python-3.8:
imageTag: '3.8'
python-3.10:
imageTag: '3.10'
# TODO
#python-latest:
# imageTag: '3'
......
......@@ -7,22 +7,26 @@ trigger:
pool:
name: SuperBench CI
demands: cuda-agent
vmImage: ubuntu-latest
container:
image: nvcr.io/nvidia/pytorch:20.12-py3
options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
image: nvcr.io/nvidia/pytorch:24.03-py3
options: '--name cuda-ci -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker:ro'
steps:
- script: |
echo "##vso[task.prependpath]$HOME/.local/bin"
displayName: Export path
- script: |
docker exec -t -u root -e DEBIAN_FRONTEND=noninteractive cuda-ci bash -c \
"apt-get update -y -q && \
yes '' | apt-get install -y -q sudo && \
apt-get install -y -q \
ffmpeg libavcodec-dev libavformat-dev libavutil-dev libboost-program-options-dev libswresample-dev"
python3 -m pip install --upgrade pip setuptools==65.7
python3 -m pip install .[test,nvworker]
make postinstall
sudo DEBIAN_FRONTEND=noninteractive apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev
displayName: Install dependencies
- script: |
python3 setup.py lint
......
......@@ -14,8 +14,9 @@ coverage:
target: 80%
threshold: 1%
flags:
- cpu-python3.6-unit-test
- cpu-python3.7-unit-test
- cpu-python3.8-unit-test
- cpu-python3.10-unit-test
- cuda-unit-test
- directx-unit-test
patch:
......@@ -23,7 +24,8 @@ coverage:
target: 80%
threshold: 1%
flags:
- cpu-python3.6-unit-test
- cpu-python3.7-unit-test
- cpu-python3.8-unit-test
- cpu-python3.10-unit-test
- cuda-unit-test
- directx-unit-test
......@@ -26,7 +26,7 @@ Here're the system requirements for control node.
### Requirements
* Latest version of Linux, you're highly encouraged to use Ubuntu 18.04 or later.
* [Python](https://www.python.org/) version 3.6 or later (which can be checked by running `python3 --version`).
* [Python](https://www.python.org/) version 3.7 or later (which can be checked by running `python3 --version`).
* [Pip](https://pip.pypa.io/en/stable/installing/) version 18.0 or later (which can be checked by running `python3 -m pip --version`).
:::note
......
......@@ -131,17 +131,17 @@ def run(self):
'Operating System :: POSIX',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Topic :: System :: Benchmark',
'Topic :: System :: Clustering',
'Topic :: System :: Hardware',
],
keywords='benchmark, AI systems',
packages=find_packages(exclude=['tests']),
python_requires='>=3.6, <4',
python_requires='>=3.7, <4',
use_scm_version={
'local_scheme': 'node-and-date',
'version_scheme': lambda _: superbench.__version__,
......
......@@ -48,6 +48,8 @@ def __init__(self, name, parameters=''):
allow_abbrev=False,
formatter_class=SortedMetavarTypeHelpFormatter,
)
# Fix optionals title in Python 3.10
self._parser._optionals.title = 'optional arguments'
self._args = None
self._curr_run_index = 0
self._result = None
......
......@@ -4,7 +4,7 @@
"""Tests for SummaryOp module."""
import unittest
from numpy import NaN, float64
from numpy import nan, float64
import pandas as pd
......@@ -55,7 +55,7 @@ def test_rule_op(self):
# Test - std
result = SummaryOp.std(raw_data_df)
print(result)
expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, NaN], index=['a', 'b', 'c', 'd'], dtype=float64)
expectedResult = pd.Series([3.0, 3.0, 2.1213203435596424, nan], index=['a', 'b', 'c', 'd'], dtype=float64)
pd.testing.assert_series_equal(result, expectedResult)
# Test - count
result = SummaryOp.count(raw_data_df)
......
......@@ -250,16 +250,35 @@ def test_pytorch_empty_cache():
# Register mnist benchmark.
BenchmarkRegistry.register_benchmark('pytorch-mnist', PytorchMNIST)
# Get initial memory reserved
init_res_memory = torch.cuda.memory_reserved()
# Test cache empty by manually calling torch.cuda.empty_cache().
parameters = '--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train'
benchmark = PytorchMNIST('pytorch-mnist', parameters=parameters)
assert (benchmark)
assert (benchmark._preprocess())
assert (benchmark._benchmark())
del benchmark
assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] > 0)
# Get current reserved memory after benchmark
post_bm_res_memory = torch.cuda.memory_reserved()
# Assert that memory is increased after benchmark
assert (post_bm_res_memory >= init_res_memory)
# Manually empty cache and get reserved memory
# Calling empty_cache() releases all unused cached memory from PyTorch so that those can be used by
# other GPU applications. However, the occupied GPU memory by tensors will not be freed so it can not
# increase the amount of GPU memory available for PyTorch.
# https://pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management
torch.cuda.empty_cache()
assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
post_empty_cache_res_memory = torch.cuda.memory_reserved()
# Assert that some memory is released after manually empty cache. The cache is not guaranteed to be reset
# back to the init_res_memory due to some tensors not being released.
assert (post_empty_cache_res_memory <= post_bm_res_memory)
# Test automatic cache empty.
context = BenchmarkRegistry.create_benchmark_context(
......@@ -268,4 +287,4 @@ def test_pytorch_empty_cache():
benchmark = BenchmarkRegistry.launch_benchmark(context)
assert (benchmark)
assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
assert (torch.cuda.memory_reserved() == post_empty_cache_res_memory)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment