Commit b18e583e authored by Joao Gomes's avatar Joao Gomes Committed by Facebook GitHub Bot
Browse files

Deprecating data utils (#2073)

Summary:
- Deprecates data utils (with warning that will be removed in v0.12)
- replaces all usages of `torchaudio.datasets.utils.download_url` with `torch.hub.download_url_to_file`
- replaces all MD5 hashes with SHA256 hash

#Addresses https://github.com/pytorch/audio/issues/1883

Pull Request resolved: https://github.com/pytorch/audio/pull/2073

Reviewed By: mthrok

Differential Revision: D33241756

Pulled By: jdsgomes

fbshipit-source-id: 49388ec5965bfc91d9a1d8d0786eeafb2969f6cf
parent 575d221e
...@@ -6,8 +6,8 @@ from typing import Tuple, Union ...@@ -6,8 +6,8 @@ from typing import Tuple, Union
import torchaudio import torchaudio
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
...@@ -15,41 +15,41 @@ URL = "aew" ...@@ -15,41 +15,41 @@ URL = "aew"
FOLDER_IN_ARCHIVE = "ARCTIC" FOLDER_IN_ARCHIVE = "ARCTIC"
_CHECKSUMS = { _CHECKSUMS = {
"http://festvox.org/cmu_arctic/packed/cmu_us_aew_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_aew_arctic.tar.bz2":
"4382b116efcc8339c37e01253cb56295", "645cb33c0f0b2ce41384fdd8d3db2c3f5fc15c1e688baeb74d2e08cab18ab406",
"http://festvox.org/cmu_arctic/packed/cmu_us_ahw_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_ahw_arctic.tar.bz2":
"b072d6e961e3f36a2473042d097d6da9", "024664adeb892809d646a3efd043625b46b5bfa3e6189b3500b2d0d59dfab06c",
"http://festvox.org/cmu_arctic/packed/cmu_us_aup_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_aup_arctic.tar.bz2":
"5301c7aee8919d2abd632e2667adfa7f", "2c55bc3050caa996758869126ad10cf42e1441212111db034b3a45189c18b6fc",
"http://festvox.org/cmu_arctic/packed/cmu_us_awb_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_awb_arctic.tar.bz2":
"280fdff1e9857119d9a2c57b50e12db7", "d74a950c9739a65f7bfc4dfa6187f2730fa03de5b8eb3f2da97a51b74df64d3c",
"http://festvox.org/cmu_arctic/packed/cmu_us_axb_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_axb_arctic.tar.bz2":
"5e21cb26c6529c533df1d02ccde5a186", "dd65c3d2907d1ee52f86e44f578319159e60f4bf722a9142be01161d84e330ff",
"http://festvox.org/cmu_arctic/packed/cmu_us_bdl_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_bdl_arctic.tar.bz2":
"b2c3e558f656af2e0a65da0ac0c3377a", "26b91aaf48b2799b2956792b4632c2f926cd0542f402b5452d5adecb60942904",
"http://festvox.org/cmu_arctic/packed/cmu_us_clb_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_clb_arctic.tar.bz2":
"3957c503748e3ce17a3b73c1b9861fb0", "3f16dc3f3b97955ea22623efb33b444341013fc660677b2e170efdcc959fa7c6",
"http://festvox.org/cmu_arctic/packed/cmu_us_eey_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_eey_arctic.tar.bz2":
"59708e932d27664f9eda3e8e6859969b", "8a0ee4e5acbd4b2f61a4fb947c1730ab3adcc9dc50b195981d99391d29928e8a",
"http://festvox.org/cmu_arctic/packed/cmu_us_fem_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_fem_arctic.tar.bz2":
"dba4f992ff023347c07c304bf72f4c73", "3fcff629412b57233589cdb058f730594a62c4f3a75c20de14afe06621ef45e2",
"http://festvox.org/cmu_arctic/packed/cmu_us_gka_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_gka_arctic.tar.bz2":
"24a876ea7335c1b0ff21460e1241340f", "dc82e7967cbd5eddbed33074b0699128dbd4482b41711916d58103707e38c67f",
"http://festvox.org/cmu_arctic/packed/cmu_us_jmk_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_jmk_arctic.tar.bz2":
"afb69d95f02350537e8a28df5ab6004b", "3a37c0e1dfc91e734fdbc88b562d9e2ebca621772402cdc693bbc9b09b211d73",
"http://festvox.org/cmu_arctic/packed/cmu_us_ksp_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_ksp_arctic.tar.bz2":
"4ce5b3b91a0a54b6b685b1b05aa0b3be", "8029cafce8296f9bed3022c44ef1e7953332b6bf6943c14b929f468122532717",
"http://festvox.org/cmu_arctic/packed/cmu_us_ljm_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_ljm_arctic.tar.bz2":
"6f45a3b2c86a4ed0465b353be291f77d", "b23993765cbf2b9e7bbc3c85b6c56eaf292ac81ee4bb887b638a24d104f921a0",
"http://festvox.org/cmu_arctic/packed/cmu_us_lnh_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_lnh_arctic.tar.bz2":
"c6a15abad5c14d27f4ee856502f0232f", "4faf34d71aa7112813252fb20c5433e2fdd9a9de55a00701ffcbf05f24a5991a",
"http://festvox.org/cmu_arctic/packed/cmu_us_rms_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_rms_arctic.tar.bz2":
"71072c983df1e590d9e9519e2a621f6e", "c6dc11235629c58441c071a7ba8a2d067903dfefbaabc4056d87da35b72ecda4",
"http://festvox.org/cmu_arctic/packed/cmu_us_rxr_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_rxr_arctic.tar.bz2":
"3771ff03a2f5b5c3b53aa0a68b9ad0d5", "1fa4271c393e5998d200e56c102ff46fcfea169aaa2148ad9e9469616fbfdd9b",
"http://festvox.org/cmu_arctic/packed/cmu_us_slp_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_slp_arctic.tar.bz2":
"9cbf984a832ea01b5058ba9a96862850", "54345ed55e45c23d419e9a823eef427f1cc93c83a710735ec667d068c916abf1",
"http://festvox.org/cmu_arctic/packed/cmu_us_slt_arctic.tar.bz2": "http://festvox.org/cmu_arctic/packed/cmu_us_slt_arctic.tar.bz2":
"959eecb2cbbc4ac304c6b92269380c81", "7c173297916acf3cc7fcab2713be4c60b27312316765a90934651d367226b4ea",
} }
...@@ -148,7 +148,7 @@ class CMUARCTIC(Dataset): ...@@ -148,7 +148,7 @@ class CMUARCTIC(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum, hash_type="md5") download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
self._text = os.path.join(self._path, self._folder_text, self._file_text) self._text = os.path.join(self._path, self._folder_text, self._file_text)
......
...@@ -4,13 +4,13 @@ from pathlib import Path ...@@ -4,13 +4,13 @@ from pathlib import Path
from typing import Iterable, Tuple, Union, List from typing import Iterable, Tuple, Union, List
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torchaudio.datasets.utils import download_url from torch.hub import download_url_to_file
_CHECKSUMS = { _CHECKSUMS = {
"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b": "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b":
"825f4ebd9183f2417df9f067a9cabe86", "209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4",
"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols": "http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols":
"385e490aabc71b48e772118e3d02923e", "408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027",
} }
_PUNCTUATIONS = set([ _PUNCTUATIONS = set([
"!EXCLAMATION-POINT", "!EXCLAMATION-POINT",
...@@ -144,14 +144,14 @@ class CMUDict(Dataset): ...@@ -144,14 +144,14 @@ class CMUDict(Dataset):
'The dictionary file is not found in the following location. ' 'The dictionary file is not found in the following location. '
f'Set `download=True` to download it. {dict_file}') f'Set `download=True` to download it. {dict_file}')
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum, hash_type="md5") download_url_to_file(url, dict_file, checksum)
if not os.path.exists(symbol_file): if not os.path.exists(symbol_file):
if not download: if not download:
raise RuntimeError( raise RuntimeError(
'The symbol file is not found in the following location. ' 'The symbol file is not found in the following location. '
f'Set `download=True` to download it. {symbol_file}') f'Set `download=True` to download it. {symbol_file}')
checksum = _CHECKSUMS.get(url_symbols, None) checksum = _CHECKSUMS.get(url_symbols, None)
download_url(url_symbols, root, hash_value=checksum, hash_type="md5") download_url_to_file(url_symbols, symbol_file, checksum)
with open(symbol_file, "r") as text: with open(symbol_file, "r") as text:
self._symbols = [line.strip() for line in text.readlines()] self._symbols = [line.strip() for line in text.readlines()]
......
...@@ -3,17 +3,16 @@ from typing import Dict, Tuple, Union ...@@ -3,17 +3,16 @@ from typing import Dict, Tuple, Union
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
import torchaudio import torchaudio
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
validate_file,
) )
_URL = "https://datashare.ed.ac.uk/bitstream/handle/10283/3038/DR-VCTK.zip" _URL = "https://datashare.ed.ac.uk/bitstream/handle/10283/3038/DR-VCTK.zip"
_CHECKSUM = "29e93debeb0e779986542229a81ff29b" _CHECKSUM = "781f12f4406ed36ed27ae3bce55da47ba176e2d8bae67319e389e07b2c9bd769"
_SUPPORTED_SUBSETS = {"train", "test"} _SUPPORTED_SUBSETS = {"train", "test"}
...@@ -55,20 +54,12 @@ class DR_VCTK(Dataset): ...@@ -55,20 +54,12 @@ class DR_VCTK(Dataset):
if not archive.is_file(): if not archive.is_file():
if not download: if not download:
raise RuntimeError("Dataset not found. Please use `download=True` to download it.") raise RuntimeError("Dataset not found. Please use `download=True` to download it.")
download_url(url, root) download_url_to_file(url, archive, hash_prefix=_CHECKSUM)
self._validate_checksum(archive)
extract_archive(archive, root) extract_archive(archive, root)
self._config = self._load_config(self._config_filepath) self._config = self._load_config(self._config_filepath)
self._filename_list = sorted(self._config) self._filename_list = sorted(self._config)
def _validate_checksum(self, archive):
with open(archive, "rb") as file_obj:
if not validate_file(file_obj, _CHECKSUM, "md5"):
raise RuntimeError(
f"The hash of {str(archive)} does not match. Delete the file manually and retry."
)
def _load_config(self, filepath: str) -> Dict[str, Tuple[str, int]]: def _load_config(self, filepath: str) -> Dict[str, Tuple[str, int]]:
# Skip header # Skip header
skip_rows = 2 if self._subset == "train" else 1 skip_rows = 2 if self._subset == "train" else 1
......
...@@ -5,8 +5,8 @@ from typing import Tuple, Optional, Union ...@@ -5,8 +5,8 @@ from typing import Tuple, Optional, Union
import torchaudio import torchaudio
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
...@@ -977,7 +977,7 @@ filtered_valid = [ ...@@ -977,7 +977,7 @@ filtered_valid = [
URL = "http://opihi.cs.uvic.ca/sound/genres.tar.gz" URL = "http://opihi.cs.uvic.ca/sound/genres.tar.gz"
FOLDER_IN_ARCHIVE = "genres" FOLDER_IN_ARCHIVE = "genres"
_CHECKSUMS = { _CHECKSUMS = {
"http://opihi.cs.uvic.ca/sound/genres.tar.gz": "5b3d6dddb579ab49814ab86dba69e7c7" "http://opihi.cs.uvic.ca/sound/genres.tar.gz": "24347e0223d2ba798e0a558c4c172d9d4a19c00bb7963fe055d183dadb4ef2c6"
} }
...@@ -1051,7 +1051,7 @@ class GTZAN(Dataset): ...@@ -1051,7 +1051,7 @@ class GTZAN(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum, hash_type="md5") download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
......
...@@ -5,8 +5,9 @@ from pathlib import Path ...@@ -5,8 +5,9 @@ from pathlib import Path
import torchaudio import torchaudio
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
...@@ -121,7 +122,7 @@ class LIBRISPEECH(Dataset): ...@@ -121,7 +122,7 @@ class LIBRISPEECH(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)) self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio))
......
...@@ -5,21 +5,28 @@ from pathlib import Path ...@@ -5,21 +5,28 @@ from pathlib import Path
import torchaudio import torchaudio
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
URL = "train-clean-100" URL = "train-clean-100"
FOLDER_IN_ARCHIVE = "LibriTTS" FOLDER_IN_ARCHIVE = "LibriTTS"
_CHECKSUMS = { _CHECKSUMS = {
"http://www.openslr.org/resources/60/dev-clean.tar.gz": "0c3076c1e5245bb3f0af7d82087ee207", "http://www.openslr.org/resources/60/dev-clean.tar.gz":
"http://www.openslr.org/resources/60/dev-other.tar.gz": "815555d8d75995782ac3ccd7f047213d", "da0864e1bd26debed35da8a869dd5c04dfc27682921936de7cff9c8a254dbe1a",
"http://www.openslr.org/resources/60/test-clean.tar.gz": "7bed3bdb047c4c197f1ad3bc412db59f", "http://www.openslr.org/resources/60/dev-other.tar.gz":
"http://www.openslr.org/resources/60/test-other.tar.gz": "ae3258249472a13b5abef2a816f733e4", "d413eda26f3a152ac7c9cf3658ef85504dfb1b625296e5fa83727f5186cca79c",
"http://www.openslr.org/resources/60/train-clean-100.tar.gz": "4a8c202b78fe1bc0c47916a98f3a2ea8", "http://www.openslr.org/resources/60/test-clean.tar.gz":
"http://www.openslr.org/resources/60/train-clean-360.tar.gz": "a84ef10ddade5fd25df69596a2767b2d", "234ea5b25859102a87024a4b9b86641f5b5aaaf1197335c95090cde04fe9a4f5",
"http://www.openslr.org/resources/60/train-other-500.tar.gz": "7b181dd5ace343a5f38427999684aa6f", "http://www.openslr.org/resources/60/test-other.tar.gz":
"33a5342094f3bba7ccc2e0500b9e72d558f72eb99328ac8debe1d9080402f10d",
"http://www.openslr.org/resources/60/train-clean-100.tar.gz":
"c5608bf1ef74bb621935382b8399c5cdd51cd3ee47cec51f00f885a64c6c7f6b",
"http://www.openslr.org/resources/60/train-clean-360.tar.gz":
"ce7cff44dcac46009d18379f37ef36551123a1dc4e5c8e4eb73ae57260de4886",
"http://www.openslr.org/resources/60/train-other-500.tar.gz":
"e35f7e34deeb2e2bdfe4403d88c8fdd5fbf64865cae41f027a185a6965f0a5df",
} }
...@@ -122,7 +129,7 @@ class LIBRITTS(Dataset): ...@@ -122,7 +129,7 @@ class LIBRITTS(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)) self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio))
......
...@@ -4,9 +4,11 @@ from typing import Tuple, Union ...@@ -4,9 +4,11 @@ from typing import Tuple, Union
from pathlib import Path from pathlib import Path
import torchaudio import torchaudio
from torchaudio.datasets.utils import download_url, extract_archive from torchaudio.datasets.utils import extract_archive
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
_RELEASE_CONFIGS = { _RELEASE_CONFIGS = {
"release1": { "release1": {
...@@ -54,7 +56,7 @@ class LJSPEECH(Dataset): ...@@ -54,7 +56,7 @@ class LJSPEECH(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _RELEASE_CONFIGS["release1"]["checksum"] checksum = _RELEASE_CONFIGS["release1"]["checksum"]
download_url(url, root, hash_value=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
with open(self._metadata_path, "r", newline='') as metadata: with open(self._metadata_path, "r", newline='') as metadata:
......
...@@ -5,8 +5,9 @@ from pathlib import Path ...@@ -5,8 +5,9 @@ from pathlib import Path
import torchaudio import torchaudio
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch import Tensor from torch import Tensor
from torch.hub import download_url_to_file
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
...@@ -16,9 +17,9 @@ HASH_DIVIDER = "_nohash_" ...@@ -16,9 +17,9 @@ HASH_DIVIDER = "_nohash_"
EXCEPT_FOLDER = "_background_noise_" EXCEPT_FOLDER = "_background_noise_"
_CHECKSUMS = { _CHECKSUMS = {
"https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.01.tar.gz": "https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.01.tar.gz":
"3cd23799cb2bbdec517f1cc028f8d43c", "743935421bb51cccdb6bdd152e04c5c70274e935c82119ad7faeec31780d811d",
"https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz": "https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz":
"6b74f3901214cb2c2934e98196829835", "af14739ee7dc311471de98f5f9d2c9191b18aedfe957f4a6ff791c709868ff58",
} }
...@@ -111,7 +112,7 @@ class SPEECHCOMMANDS(Dataset): ...@@ -111,7 +112,7 @@ class SPEECHCOMMANDS(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum, hash_type="md5") download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive, self._path) extract_archive(archive, self._path)
if subset == "validation": if subset == "validation":
......
...@@ -5,8 +5,9 @@ from pathlib import Path ...@@ -5,8 +5,9 @@ from pathlib import Path
import torchaudio import torchaudio
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
...@@ -101,7 +102,7 @@ class TEDLIUM(Dataset): ...@@ -101,7 +102,7 @@ class TEDLIUM(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _RELEASE_CONFIGS[release]["checksum"] checksum = _RELEASE_CONFIGS[release]["checksum"]
download_url(url, root, hash_value=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
# Create list for all samples # Create list for all samples
......
...@@ -5,6 +5,7 @@ import tarfile ...@@ -5,6 +5,7 @@ import tarfile
import urllib import urllib
import urllib.request import urllib.request
import zipfile import zipfile
import warnings
from typing import Any, Iterable, List, Optional from typing import Any, Iterable, List, Optional
from torch.utils.model_zoo import tqdm from torch.utils.model_zoo import tqdm
...@@ -71,7 +72,7 @@ def download_url(url: str, ...@@ -71,7 +72,7 @@ def download_url(url: str,
progress_bar (bool, optional): Display a progress bar (Default: ``True``). progress_bar (bool, optional): Display a progress bar (Default: ``True``).
resume (bool, optional): Enable resuming download (Default: ``False``). resume (bool, optional): Enable resuming download (Default: ``False``).
""" """
warnings.warn("download_url is deprecated and will be removed in the v0.12 release.")
req = urllib.request.Request(url, method="HEAD") req = urllib.request.Request(url, method="HEAD")
req_info = urllib.request.urlopen(req).info() req_info = urllib.request.urlopen(req).info()
......
...@@ -3,16 +3,18 @@ from typing import Tuple ...@@ -3,16 +3,18 @@ from typing import Tuple
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
import torchaudio import torchaudio
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
URL = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip" URL = "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"
_CHECKSUMS = { _CHECKSUMS = {
"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip": "8a6ba2946b36fcbef0212cad601f4bfa" "https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip":
"f96258be9fdc2cbff6559541aae7ea4f59df3fcaf5cf963aae5ca647357e359c"
} }
...@@ -63,7 +65,7 @@ class VCTK_092(Dataset): ...@@ -63,7 +65,7 @@ class VCTK_092(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url(url, root, hash_value=checksum, hash_type="md5") download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive, self._path) extract_archive(archive, self._path)
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
......
...@@ -4,10 +4,10 @@ from typing import List, Tuple, Union ...@@ -4,10 +4,10 @@ from typing import List, Tuple, Union
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torch.hub import download_url_to_file
import torchaudio import torchaudio
from torchaudio.datasets.utils import ( from torchaudio.datasets.utils import (
download_url,
extract_archive, extract_archive,
) )
...@@ -54,7 +54,7 @@ class YESNO(Dataset): ...@@ -54,7 +54,7 @@ class YESNO(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
if not os.path.isfile(archive): if not os.path.isfile(archive):
checksum = _RELEASE_CONFIGS["release1"]["checksum"] checksum = _RELEASE_CONFIGS["release1"]["checksum"]
download_url(url, root, hash_value=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment