Unverified Commit 898802fe authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

fix scheduled download tests (#2706)



* fix triggers for scheduled workflow

* more fix

* add missing repository checkout

* try fix label in template

* rewrite test infrastructure

* trigger issue generation

* try fix issue template

* try remove quotes

* remove buggy label

* try fix title

* cleanup

* add more test details

* reenable issue creation
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>
parent 9e7a4b19
---
title: Scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }} failed
labels: bug, module: datasets
title: Scheduled workflow failed
labels:
- bug
- "module: datasets"
---
Oh no, something went wrong in the scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }}.
......
......@@ -2,6 +2,7 @@ name: tests
on:
pull_request:
paths:
- "test/test_datasets_download.py"
- ".github/failed_schedule_issue_template.md"
- ".github/workflows/tests-schedule.yml"
......@@ -22,20 +23,23 @@ jobs:
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Checkout repository
uses: actions/checkout@v2
- name: Install PyTorch from the nightlies
run: |
pip install numpy
pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
- name: Install tests requirements
run: pip install pytest pytest-subtests
run: pip install pytest
- name: Run tests
run: pytest test/test_datasets_download.py
run: pytest --durations=20 -ra test/test_datasets_download.py
- uses: JasonEtco/create-an-issue@v2.4.0
name: Create issue if download tests failed
if: failure()
if: failure() && github.event_name == 'schedule'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
......
import contextlib
import itertools
import time
import unittest
import unittest.mock
from datetime import datetime
from os import path
from urllib.parse import urlparse
from urllib.request import urlopen, Request
import pytest
from torchvision import datasets
from torchvision.datasets.utils import download_url, check_integrity
......@@ -43,14 +44,10 @@ def limit_requests_per_time(min_secs_between_requests=2.0):
urlopen = limit_requests_per_time()(urlopen)
class DownloadTester(unittest.TestCase):
@staticmethod
@contextlib.contextmanager
def log_download_attempts(patch=True):
@contextlib.contextmanager
def log_download_attempts(patch=True):
urls_and_md5s = set()
with unittest.mock.patch(
"torchvision.datasets.utils.download_url", wraps=None if patch else download_url
) as mock:
with unittest.mock.patch("torchvision.datasets.utils.download_url", wraps=None if patch else download_url) as mock:
try:
yield urls_and_md5s
finally:
......@@ -59,8 +56,8 @@ class DownloadTester(unittest.TestCase):
md5 = args[-1] if len(args) == 4 else kwargs.get("md5")
urls_and_md5s.add((url, md5))
@staticmethod
def retry(fn, times=1, wait=5.0):
def retry(fn, times=1, wait=5.0):
msgs = []
for _ in range(times + 1):
try:
......@@ -78,54 +75,63 @@ class DownloadTester(unittest.TestCase):
)
)
@staticmethod
def assert_response_ok(response, url=None, ok=200):
def assert_server_response_ok(response, url=None):
msg = f"The server returned status code {response.code}"
if url is not None:
msg += f"for the the URL {url}"
assert response.code == ok, msg
assert 200 <= response.code < 300, msg
@staticmethod
def assert_is_downloadable(url):
def assert_url_is_accessible(url):
request = Request(url, headers=dict(method="HEAD"))
response = urlopen(request)
DownloadTester.assert_response_ok(response, url)
assert_server_response_ok(response, url)
@staticmethod
def assert_downloads_correctly(url, md5):
def assert_file_downloads_correctly(url, md5):
with get_tmp_dir() as root:
file = path.join(root, path.basename(url))
with urlopen(url) as response, open(file, "wb") as fh:
DownloadTester.assert_response_ok(response, url)
assert_server_response_ok(response, url)
fh.write(response.read())
assert check_integrity(file, md5=md5), "The MD5 checksums mismatch"
def test_download(self):
assert_fn = (
lambda url, _: self.assert_is_downloadable(url)
if self.only_test_downloadability
else self.assert_downloads_correctly
)
for url, md5 in self.collect_urls_and_md5s():
with self.subTest(url=url, md5=md5):
self.retry(lambda: assert_fn(url, md5))
def collect_urls_and_md5s(self):
raise NotImplementedError
class DownloadConfig:
def __init__(self, url, md5=None, id=None):
self.url = url
self.md5 = md5
self.id = id or url
@property
def only_test_downloadability(self):
return True
def make_parametrize_kwargs(download_configs):
argvalues = []
ids = []
for config in download_configs:
argvalues.append((config.url, config.md5))
ids.append(config.id)
class Places365Tester(DownloadTester):
def collect_urls_and_md5s(self):
with self.log_download_attempts(patch=False) as urls_and_md5s:
return dict(argnames="url, md5", argvalues=argvalues, ids=ids)
def places365():
with log_download_attempts(patch=False) as urls_and_md5s:
for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
with places365_root(split=split, small=small) as places365:
root, data = places365
datasets.Places365(root, split=split, small=small, download=True)
return urls_and_md5s
return [DownloadConfig(url, md5=md5, id=f"Places365, {url}") for url, md5 in urls_and_md5s]
@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain(places365(),)))
def test_url_is_accessible(url, md5):
retry(lambda: assert_url_is_accessible(url))
@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain()))
def test_file_downloads_correctly(url, md5):
retry(lambda: assert_file_downloads_correctly(url, md5))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment