Unverified Commit 898802fe authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

fix scheduled download tests (#2706)



* fix triggers for scheduled workflow

* more fix

* add missing repository checkout

* try fix label in template

* rewrite test infrastructure

* trigger issue generation

* try fix issue template

* try remove quotes

* remove buggy label

* try fix title

* cleanup

* add more test details

* reenable issue creation
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>
parent 9e7a4b19
--- ---
title: Scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }} failed title: Scheduled workflow failed
labels: bug, module: datasets labels:
- bug
- "module: datasets"
--- ---
Oh no, something went wrong in the scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }}. Oh no, something went wrong in the scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }}.
......
...@@ -2,6 +2,7 @@ name: tests ...@@ -2,6 +2,7 @@ name: tests
on: on:
pull_request: pull_request:
paths:
- "test/test_datasets_download.py" - "test/test_datasets_download.py"
- ".github/failed_schedule_issue_template.md" - ".github/failed_schedule_issue_template.md"
- ".github/workflows/tests-schedule.yml" - ".github/workflows/tests-schedule.yml"
...@@ -22,20 +23,23 @@ jobs: ...@@ -22,20 +23,23 @@ jobs:
- name: Upgrade pip - name: Upgrade pip
run: python -m pip install --upgrade pip run: python -m pip install --upgrade pip
- name: Checkout repository
uses: actions/checkout@v2
- name: Install PyTorch from the nightlies - name: Install PyTorch from the nightlies
run: | run: |
pip install numpy pip install numpy
pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
- name: Install tests requirements - name: Install tests requirements
run: pip install pytest pytest-subtests run: pip install pytest
- name: Run tests - name: Run tests
run: pytest test/test_datasets_download.py run: pytest --durations=20 -ra test/test_datasets_download.py
- uses: JasonEtco/create-an-issue@v2.4.0 - uses: JasonEtco/create-an-issue@v2.4.0
name: Create issue if download tests failed name: Create issue if download tests failed
if: failure() if: failure() && github.event_name == 'schedule'
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }} REPO: ${{ github.repository }}
......
import contextlib import contextlib
import itertools import itertools
import time import time
import unittest
import unittest.mock import unittest.mock
from datetime import datetime from datetime import datetime
from os import path from os import path
from urllib.parse import urlparse from urllib.parse import urlparse
from urllib.request import urlopen, Request from urllib.request import urlopen, Request
import pytest
from torchvision import datasets from torchvision import datasets
from torchvision.datasets.utils import download_url, check_integrity from torchvision.datasets.utils import download_url, check_integrity
...@@ -43,14 +44,10 @@ def limit_requests_per_time(min_secs_between_requests=2.0): ...@@ -43,14 +44,10 @@ def limit_requests_per_time(min_secs_between_requests=2.0):
urlopen = limit_requests_per_time()(urlopen) urlopen = limit_requests_per_time()(urlopen)
class DownloadTester(unittest.TestCase): @contextlib.contextmanager
@staticmethod def log_download_attempts(patch=True):
@contextlib.contextmanager
def log_download_attempts(patch=True):
urls_and_md5s = set() urls_and_md5s = set()
with unittest.mock.patch( with unittest.mock.patch("torchvision.datasets.utils.download_url", wraps=None if patch else download_url) as mock:
"torchvision.datasets.utils.download_url", wraps=None if patch else download_url
) as mock:
try: try:
yield urls_and_md5s yield urls_and_md5s
finally: finally:
...@@ -59,8 +56,8 @@ class DownloadTester(unittest.TestCase): ...@@ -59,8 +56,8 @@ class DownloadTester(unittest.TestCase):
md5 = args[-1] if len(args) == 4 else kwargs.get("md5") md5 = args[-1] if len(args) == 4 else kwargs.get("md5")
urls_and_md5s.add((url, md5)) urls_and_md5s.add((url, md5))
@staticmethod
def retry(fn, times=1, wait=5.0): def retry(fn, times=1, wait=5.0):
msgs = [] msgs = []
for _ in range(times + 1): for _ in range(times + 1):
try: try:
...@@ -78,54 +75,63 @@ class DownloadTester(unittest.TestCase): ...@@ -78,54 +75,63 @@ class DownloadTester(unittest.TestCase):
) )
) )
@staticmethod
def assert_response_ok(response, url=None, ok=200): def assert_server_response_ok(response, url=None):
msg = f"The server returned status code {response.code}" msg = f"The server returned status code {response.code}"
if url is not None: if url is not None:
msg += f"for the the URL {url}" msg += f"for the the URL {url}"
assert response.code == ok, msg assert 200 <= response.code < 300, msg
@staticmethod
def assert_is_downloadable(url): def assert_url_is_accessible(url):
request = Request(url, headers=dict(method="HEAD")) request = Request(url, headers=dict(method="HEAD"))
response = urlopen(request) response = urlopen(request)
DownloadTester.assert_response_ok(response, url) assert_server_response_ok(response, url)
@staticmethod def assert_file_downloads_correctly(url, md5):
def assert_downloads_correctly(url, md5):
with get_tmp_dir() as root: with get_tmp_dir() as root:
file = path.join(root, path.basename(url)) file = path.join(root, path.basename(url))
with urlopen(url) as response, open(file, "wb") as fh: with urlopen(url) as response, open(file, "wb") as fh:
DownloadTester.assert_response_ok(response, url) assert_server_response_ok(response, url)
fh.write(response.read()) fh.write(response.read())
assert check_integrity(file, md5=md5), "The MD5 checksums mismatch" assert check_integrity(file, md5=md5), "The MD5 checksums mismatch"
def test_download(self):
assert_fn = (
lambda url, _: self.assert_is_downloadable(url)
if self.only_test_downloadability
else self.assert_downloads_correctly
)
for url, md5 in self.collect_urls_and_md5s():
with self.subTest(url=url, md5=md5):
self.retry(lambda: assert_fn(url, md5))
def collect_urls_and_md5s(self): class DownloadConfig:
raise NotImplementedError def __init__(self, url, md5=None, id=None):
self.url = url
self.md5 = md5
self.id = id or url
@property
def only_test_downloadability(self):
return True
def make_parametrize_kwargs(download_configs):
argvalues = []
ids = []
for config in download_configs:
argvalues.append((config.url, config.md5))
ids.append(config.id)
class Places365Tester(DownloadTester): return dict(argnames="url, md5", argvalues=argvalues, ids=ids)
def collect_urls_and_md5s(self):
with self.log_download_attempts(patch=False) as urls_and_md5s:
def places365():
with log_download_attempts(patch=False) as urls_and_md5s:
for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)): for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
with places365_root(split=split, small=small) as places365: with places365_root(split=split, small=small) as places365:
root, data = places365 root, data = places365
datasets.Places365(root, split=split, small=small, download=True) datasets.Places365(root, split=split, small=small, download=True)
return urls_and_md5s return [DownloadConfig(url, md5=md5, id=f"Places365, {url}") for url, md5 in urls_and_md5s]
@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain(places365(),)))
def test_url_is_accessible(url, md5):
retry(lambda: assert_url_is_accessible(url))
@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain()))
def test_file_downloads_correctly(url, md5):
retry(lambda: assert_file_downloads_correctly(url, md5))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment