fix scheduled download tests (#2706)

* fix triggers for scheduled workflow * more fix * add missing repository checkout * try fix label in template * rewrite test infrastructure * trigger issue generation * try fix issue template * try remove quotes * remove buggy label * try fix title * cleanup * add more test details * reenable issue creation Co-authored-by: Francisco Massa <fvsmassa@gmail.com>

fix scheduled download tests (#2706)
* fix triggers for scheduled workflow * more fix * add missing repository checkout * try fix label in template * rewrite test infrastructure * trigger issue generation * try fix issue template * try remove quotes * remove buggy label * try fix title * cleanup * add more test details * reenable issue creation Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
898802fe · Philip Meier · GitHub · 9e7a4b19 · 898802fe · 898802fe
Unverified Commit 898802fe authored Sep 28, 2020 by Philip Meier Committed by GitHub Sep 28, 2020
3 changed files
--- a/.github/failed_schedule_issue_template.md
+++ b/.github/failed_schedule_issue_template.md
 ---
-title: Scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }} failed
-labels: bug, module: datasets
+title: Scheduled workflow failed
+labels:
+  - bug
+  - "module: datasets"
 ---

 Oh no, something went wrong in the scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }}. 

--- a/.github/workflows/tests-schedule.yml
+++ b/.github/workflows/tests-schedule.yml
@@ -2,6 +2,7 @@ name: tests

 on:
  pull_request:
+    paths:
      - "test/test_datasets_download.py"
      - ".github/failed_schedule_issue_template.md"
      - ".github/workflows/tests-schedule.yml"
@@ -22,20 +23,23 @@ jobs:
      - name: Upgrade pip
        run: python -m pip install --upgrade pip

+      - name: Checkout repository
+        uses: actions/checkout@v2
+
      - name: Install PyTorch from the nightlies
        run: |
          pip install numpy
          pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html

      - name: Install tests requirements
-        run: pip install pytest pytest-subtests
+        run: pip install pytest

      - name: Run tests
-        run: pytest test/test_datasets_download.py
+        run: pytest --durations=20 -ra test/test_datasets_download.py

      - uses: JasonEtco/create-an-issue@v2.4.0
        name: Create issue if download tests failed
-        if: failure()
+        if: failure() && github.event_name == 'schedule'
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          REPO: ${{ github.repository }}

--- a/test/test_datasets_download.py
+++ b/test/test_datasets_download.py
 import contextlib
 import itertools
 import time
-import unittest
 import unittest.mock
 from datetime import datetime
 from os import path
 from urllib.parse import urlparse
 from urllib.request import urlopen, Request

+import pytest
+
 from torchvision import datasets
 from torchvision.datasets.utils import download_url, check_integrity

@@ -43,14 +44,10 @@ def limit_requests_per_time(min_secs_between_requests=2.0):
 urlopen = limit_requests_per_time()(urlopen)


-class DownloadTester(unittest.TestCase):
-    @staticmethod
-    @contextlib.contextmanager
-    def log_download_attempts(patch=True):
+@contextlib.contextmanager
+def log_download_attempts(patch=True):
    urls_and_md5s = set()
-        with unittest.mock.patch(
-            "torchvision.datasets.utils.download_url", wraps=None if patch else download_url
-        ) as mock:
+    with unittest.mock.patch("torchvision.datasets.utils.download_url", wraps=None if patch else download_url) as mock:
        try:
            yield urls_and_md5s
        finally:
@@ -59,8 +56,8 @@ class DownloadTester(unittest.TestCase):
                md5 = args[-1] if len(args) == 4 else kwargs.get("md5")
                urls_and_md5s.add((url, md5))

-    @staticmethod
-    def retry(fn, times=1, wait=5.0):
+
+def retry(fn, times=1, wait=5.0):
    msgs = []
    for _ in range(times + 1):
        try:
@@ -78,54 +75,63 @@ class DownloadTester(unittest.TestCase):
            )
        )

-    @staticmethod
-    def assert_response_ok(response, url=None, ok=200):
+
+def assert_server_response_ok(response, url=None):
    msg = f"The server returned status code {response.code}"
    if url is not None:
        msg += f"for the the URL {url}"
-        assert response.code == ok, msg
+    assert 200 <= response.code < 300, msg

-    @staticmethod
-    def assert_is_downloadable(url):
+
+def assert_url_is_accessible(url):
    request = Request(url, headers=dict(method="HEAD"))
    response = urlopen(request)
-        DownloadTester.assert_response_ok(response, url)
+    assert_server_response_ok(response, url)
+

-    @staticmethod
-    def assert_downloads_correctly(url, md5):
+def assert_file_downloads_correctly(url, md5):
    with get_tmp_dir() as root:
        file = path.join(root, path.basename(url))
        with urlopen(url) as response, open(file, "wb") as fh:
-                DownloadTester.assert_response_ok(response, url)
+            assert_server_response_ok(response, url)
            fh.write(response.read())

        assert check_integrity(file, md5=md5), "The MD5 checksums mismatch"

-    def test_download(self):
-        assert_fn = (
-            lambda url, _: self.assert_is_downloadable(url)
-            if self.only_test_downloadability
-            else self.assert_downloads_correctly
-        )
-        for url, md5 in self.collect_urls_and_md5s():
-            with self.subTest(url=url, md5=md5):
-                self.retry(lambda: assert_fn(url, md5))

-    def collect_urls_and_md5s(self):
-        raise NotImplementedError
+class DownloadConfig:
+    def __init__(self, url, md5=None, id=None):
+        self.url = url
+        self.md5 = md5
+        self.id = id or url

-    @property
-    def only_test_downloadability(self):
-        return True

+def make_parametrize_kwargs(download_configs):
+    argvalues = []
+    ids = []
+    for config in download_configs:
+        argvalues.append((config.url, config.md5))
+        ids.append(config.id)

-class Places365Tester(DownloadTester):
-    def collect_urls_and_md5s(self):
-        with self.log_download_attempts(patch=False) as urls_and_md5s:
+    return dict(argnames="url, md5", argvalues=argvalues, ids=ids)
+
+
+def places365():
+    with log_download_attempts(patch=False) as urls_and_md5s:
        for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)):
            with places365_root(split=split, small=small) as places365:
                root, data = places365

                datasets.Places365(root, split=split, small=small, download=True)

-        return urls_and_md5s
+    return [DownloadConfig(url, md5=md5, id=f"Places365, {url}") for url, md5 in urls_and_md5s]
+
+
+@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain(places365(),)))
+def test_url_is_accessible(url, md5):
+    retry(lambda: assert_url_is_accessible(url))
+
+
+@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain()))
+def test_file_downloads_correctly(url, md5):
+    retry(lambda: assert_file_downloads_correctly(url, md5))