Added dataset download support in fbcode (#3823) (#3826)

Summary: Pull Request resolved: https://github.com/pytorch/vision/pull/3823 Uploaded FashionMNIST dataset to [manifold](https://www.internalfb.com/intern/network/manifold/?bucket=torchvision&path=tree%2Fdatasets) bucket `torchvision`. Any new dataset that needs to be added could be uploaded under `tree/datasets/<dataset_name>`. Reviewed By: datumbox Differential Revision: D28358470 fbshipit-source-id: 6f2282d3f1ce4b1416e962de8fb132896d4b2d76

Added dataset download support in fbcode (#3823) (#3826)
Summary: Pull Request resolved: https://github.com/pytorch/vision/pull/3823 Uploaded FashionMNIST dataset to [manifold](https://www.internalfb.com/intern/network/manifold/?bucket=torchvision&path=tree%2Fdatasets) bucket `torchvision`. Any new dataset that needs to be added could be uploaded under `tree/datasets/<dataset_name>`. Reviewed By: datumbox Differential Revision: D28358470 fbshipit-source-id: 6f2282d3f1ce4b1416e962de8fb132896d4b2d76
f5aa5f58 · Prabhat Roy · GitHub · 0fd0f503 · f5aa5f58 · f5aa5f58
Unverified Commit f5aa5f58 authored May 13, 2021 by Prabhat Roy Committed by GitHub May 13, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 17 deletions

torchvision/datasets/_utils.py torchvision/datasets/_utils.py +6 -0

torchvision/datasets/utils.py torchvision/datasets/utils.py +26 -17

No files found.
--- a/torchvision/datasets/_utils.py
+++ b/torchvision/datasets/_utils.py
+def _download_file_from_remote_location(fpath: str) -> None:
+    pass
+def _is_remote_location_available() -> bool:
+    return False
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -17,6 +17,11 @@ import pathlib
 import torch
 from torch.utils.model_zoo import tqdm
+from ._utils import (
+    _download_file_from_remote_location,
+    _is_remote_location_available,
+)
 USER_AGENT = "pytorch/vision"
@@ -117,26 +122,30 @@ def download_url(
        print('Using downloaded and verified file: ' + fpath)
        return
-    # expand redirect chain if needed
+    if _is_remote_location_available():
-    url = _get_redirect_url(url, max_hops=max_redirect_hops)
+        _download_file_from_remote_location(fpath)
+    else:
+        # expand redirect chain if needed
+        url = _get_redirect_url(url, max_hops=max_redirect_hops)
-    # check if file is located on Google Drive
+        # check if file is located on Google Drive
-    file_id = _get_google_drive_file_id(url)
+        file_id = _get_google_drive_file_id(url)
-    if file_id is not None:
+        if file_id is not None:
-        return download_file_from_google_drive(file_id, root, filename, md5)
+            return download_file_from_google_drive(file_id, root, filename, md5)
-    # download the file
+        # download the file
-    try:
+        try:
-        print('Downloading ' + url + ' to ' + fpath)
+            print('Downloading ' + url + ' to ' + fpath)
-        _urlretrieve(url, fpath)
-    except (urllib.error.URLError, IOError) as e:  # type: ignore[attr-defined]
-        if url[:5] == 'https':
-            url = url.replace('https:', 'http:')
-            print('Failed download. Trying https -> http instead.'
-                  ' Downloading ' + url + ' to ' + fpath)
            _urlretrieve(url, fpath)
-        else:
+        except (urllib.error.URLError, IOError) as e:  # type: ignore[attr-defined]
-            raise e
+            if url[:5] == 'https':
+                url = url.replace('https:', 'http:')
+                print('Failed download. Trying https -> http instead.'
+                      ' Downloading ' + url + ' to ' + fpath)
+                _urlretrieve(url, fpath)
+            else:
+                raise e
    # check integrity of downloaded file
    if not check_integrity(fpath, md5):
        raise RuntimeError("File not found or corrupted.")