Commit a61b90c2 authored by Sean Kim's avatar Sean Kim Committed by Facebook GitHub Bot
Browse files

Raising RuntimeErrors when datasets missing (#2430)

Summary:
Checks download flag and raises error when dataset is missing given download flag exists. Unit tested manually.

edit: Changed path to check as well as comment that is returned.

Pull Request resolved: https://github.com/pytorch/audio/pull/2430

Reviewed By: carolineechen

Differential Revision: D36815729

Pulled By: skim0514

fbshipit-source-id: f062db7919271665b88ec9754d85cfa83b4f6fa3
parent 6e563839
...@@ -120,7 +120,12 @@ class CMUARCTIC(Dataset): ...@@ -120,7 +120,12 @@ class CMUARCTIC(Dataset):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url_to_file(url, archive, hash_prefix=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
else:
if not os.path.exists(self._path):
raise RuntimeError(
f"The path {self._path} doesn't exist. "
"Please check the ``root`` path or set `download=True` to download it"
)
self._text = os.path.join(self._path, self._folder_text, self._file_text) self._text = os.path.join(self._path, self._folder_text, self._file_text)
with open(self._text, "r") as text: with open(self._text, "r") as text:
......
...@@ -122,6 +122,12 @@ class LIBRITTS(Dataset): ...@@ -122,6 +122,12 @@ class LIBRITTS(Dataset):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url_to_file(url, archive, hash_prefix=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
else:
if not os.path.exists(self._path):
raise RuntimeError(
f"The path {self._path} doesn't exist. "
"Please check the ``root`` path or set `download=True` to download it"
)
self._walker = sorted(str(p.stem) for p in Path(self._path).glob("*/*/*" + self._ext_audio)) self._walker = sorted(str(p.stem) for p in Path(self._path).glob("*/*/*" + self._ext_audio))
......
...@@ -60,6 +60,12 @@ class LJSPEECH(Dataset): ...@@ -60,6 +60,12 @@ class LJSPEECH(Dataset):
checksum = _RELEASE_CONFIGS["release1"]["checksum"] checksum = _RELEASE_CONFIGS["release1"]["checksum"]
download_url_to_file(url, archive, hash_prefix=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
else:
if not os.path.exists(self._path):
raise RuntimeError(
f"The path {self._path} doesn't exist. "
"Please check the ``root`` path or set `download=True` to download it"
)
with open(self._metadata_path, "r", newline="") as metadata: with open(self._metadata_path, "r", newline="") as metadata:
flist = csv.reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE) flist = csv.reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE)
......
...@@ -109,6 +109,12 @@ class SPEECHCOMMANDS(Dataset): ...@@ -109,6 +109,12 @@ class SPEECHCOMMANDS(Dataset):
checksum = _CHECKSUMS.get(url, None) checksum = _CHECKSUMS.get(url, None)
download_url_to_file(url, archive, hash_prefix=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive, self._path) extract_archive(archive, self._path)
else:
if not os.path.exists(self._path):
raise RuntimeError(
f"The path {self._path} doesn't exist. "
"Please check the ``root`` path or set `download=True` to download it"
)
if subset == "validation": if subset == "validation":
self._walker = _load_list(self._path, "validation_list.txt") self._walker = _load_list(self._path, "validation_list.txt")
......
...@@ -108,6 +108,12 @@ class TEDLIUM(Dataset): ...@@ -108,6 +108,12 @@ class TEDLIUM(Dataset):
checksum = _RELEASE_CONFIGS[release]["checksum"] checksum = _RELEASE_CONFIGS[release]["checksum"]
download_url_to_file(url, archive, hash_prefix=checksum) download_url_to_file(url, archive, hash_prefix=checksum)
extract_archive(archive) extract_archive(archive)
else:
if not os.path.exists(self._path):
raise RuntimeError(
f"The path {self._path} doesn't exist. "
"Please check the ``root`` path or set `download=True` to download it"
)
# Create list for all samples # Create list for all samples
self._filelist = [] self._filelist = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment