Unverified Commit 814c4f08 authored by Eli Uriegas's avatar Eli Uriegas Committed by GitHub
Browse files

datasets: Fallback to our own mirrors for mnist (#3544)



We are experiencing 403s when trying to download from the main mnist
site so lets fallback to our own mirror on failure.
Signed-off-by: default avatarEli Uriegas <eliuriegas@fb.com>
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>
parent 6f062c95
...@@ -10,6 +10,7 @@ import string ...@@ -10,6 +10,7 @@ import string
import gzip import gzip
import lzma import lzma
from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union from typing import Any, Callable, Dict, IO, List, Optional, Tuple, Union
from urllib.error import URLError
from .utils import download_url, download_and_extract_archive, extract_archive, \ from .utils import download_url, download_and_extract_archive, extract_archive, \
verify_str_arg verify_str_arg
...@@ -31,11 +32,16 @@ class MNIST(VisionDataset): ...@@ -31,11 +32,16 @@ class MNIST(VisionDataset):
target and transforms it. target and transforms it.
""" """
mirrors = [
'http://yann.lecun.com/exdb/mnist/',
'https://ossci-datasets.s3.amazonaws.com/mnist/',
]
resources = [ resources = [
("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"), ("train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"), ("train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"), ("t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c") ("t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
] ]
training_file = 'training.pt' training_file = 'training.pt'
...@@ -141,9 +147,26 @@ class MNIST(VisionDataset): ...@@ -141,9 +147,26 @@ class MNIST(VisionDataset):
os.makedirs(self.processed_folder, exist_ok=True) os.makedirs(self.processed_folder, exist_ok=True)
# download files # download files
for url, md5 in self.resources: for filename, md5 in self.resources:
filename = url.rpartition('/')[2] for mirror in self.mirrors:
download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5) url = "{}{}".format(mirror, filename)
try:
print("Downloading {}".format(url))
download_and_extract_archive(
url, download_root=self.raw_folder,
filename=filename,
md5=md5
)
except URLError as error:
print(
"Failed to download (trying next):\n{}".format(error)
)
continue
finally:
print()
break
else:
raise RuntimeError("Error downloading {}".format(filename))
# process and save as torch files # process and save as torch files
print('Processing...') print('Processing...')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment