Commit 638120ca authored by Caroline Chen's avatar Caroline Chen Committed by Facebook GitHub Bot
Browse files

Add citations for datasets (#2371)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/2371

Reviewed By: xiaohui-zhang

Differential Revision: D36246167

Pulled By: carolineechen

fbshipit-source-id: 23042a1c393711864a18c9815d248c18d1d258b4
parent fe3d5d10
...@@ -127,3 +127,9 @@ QUESST14 ...@@ -127,3 +127,9 @@ QUESST14
.. autoclass:: QUESST14 .. autoclass:: QUESST14
:members: :members:
:special-members: __getitem__ :special-members: __getitem__
References
~~~~~~~~~~
.. footbibliography::
...@@ -274,3 +274,79 @@ ...@@ -274,3 +274,79 @@
journal={arXiv preprint arXiv:2201.12465}, journal={arXiv preprint arXiv:2201.12465},
year={2022} year={2022}
} }
@TECHREPORT{Kominek03cmuarctic,
author = {John Kominek and Alan W Black and Ver Ver},
title = {CMU Arctic Databases for Speech Synthesis},
institution = {},
year = {2003}
}
@misc{cosentino2020librimix,
title={LibriMix: An Open-Source Dataset for Generalizable Speech Separation},
author={Joris Cosentino and Manuel Pariente and Samuele Cornell and Antoine Deleforge and Emmanuel Vincent},
year={2020},
eprint={2005.11262},
archivePrefix={arXiv},
primaryClass={eess.AS}
}
@article{Zen2019LibriTTSAC,
title={LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech},
author={Heiga Zen and Viet-Trung Dang and Robert A. J. Clark and Yu Zhang and Ron J. Weiss and Ye Jia and Z. Chen and Yonghui Wu},
journal={ArXiv},
year={2019},
volume={abs/1904.02882}
}
@article{speechcommandsv2,
author = { {Warden}, P.},
title = "{Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition}",
journal = {ArXiv e-prints},
archivePrefix = "arXiv",
eprint = {1804.03209},
primaryClass = "cs.CL",
keywords = {Computer Science - Computation and Language, Computer Science - Human-Computer Interaction},
year = 2018,
month = apr,
url = {https://arxiv.org/abs/1804.03209},
}
@inproceedings{rousseau2012tedlium,
title={TED-LIUM: an Automatic Speech Recognition dedicated corpus},
author={Rousseau, Anthony and Del{\'e}glise, Paul and Est{\`e}ve, Yannick},
booktitle={Conference on Language Resources and Evaluation (LREC)},
pages={125--129},
year={2012}
}
@misc{yamagishi2019vctk,
author={Yamagishi, Junichi and Veaux, Christophe and MacDonald, Kirsten},
title={ {CSTR VCTK Corpus}: English Multi-speaker Corpus for {CSTR} Voice Cloning Toolkit (version 0.92)},
publisher={University of Edinburgh. The Centre for Speech Technology Research (CSTR)},
year=2019,
doi={10.7488/ds/2645},
}
@misc{Sarfjoo2018DeviceRV,
title={Device Recorded VCTK (Small subset version)},
author={Seyyed Saeed Sarfjoo and Junichi Yamagishi},
year={2018}
}
@misc{tzanetakis_essl_cook_2001,
author = "Tzanetakis, George and Essl, Georg and Cook, Perry",
title = "Automatic Musical Genre Classification Of Audio Signals",
url = "http://ismir2001.ismir.net/pdf/tzanetakis.pdf",
publisher = "The International Society for Music Information Retrieval",
year = "2001"
}
@article{Mir2015QUESST2014EQ,
title={QUESST2014: Evaluating Query-by-Example Speech Search in a zero-resource setting with real-life queries},
author={Xavier Anguera Miro and Luis Javier Rodriguez-Fuentes and Andi Buzo and Florian Metze and Igor Szoke and Mikel Pe{\~n}agarikano},
journal={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
year={2015},
pages={5833-5837}
}
@misc{cmudict,
title={The Carnegie Mellon pronuncing dictionary},
author={Weide, R.L.},
year={1998},
url={http://www.speech.cs.cmu.edu/cgi-bin/cmudict},
}
@misc{YesNo,
title="YesNo",
url="http://www.openslr.org/1/"
}
...@@ -51,7 +51,7 @@ def load_cmuarctic_item(line: str, path: str, folder_audio: str, ext_audio: str) ...@@ -51,7 +51,7 @@ def load_cmuarctic_item(line: str, path: str, folder_audio: str, ext_audio: str)
class CMUARCTIC(Dataset): class CMUARCTIC(Dataset):
"""Create a Dataset for CMU_ARCTIC. """Create a Dataset for *CMU ARCTIC* [:footcite:`Kominek03cmuarctic`].
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -104,7 +104,7 @@ def _parse_dictionary(lines: Iterable[str], exclude_punctuations: bool) -> List[ ...@@ -104,7 +104,7 @@ def _parse_dictionary(lines: Iterable[str], exclude_punctuations: bool) -> List[
class CMUDict(Dataset): class CMUDict(Dataset):
"""Create a Dataset for CMU Pronouncing Dictionary (CMUDict). """Create a Dataset for *CMU Pronouncing Dictionary* [:footcite:`cmudict`] (CMUDict).
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -27,7 +27,7 @@ def load_commonvoice_item( ...@@ -27,7 +27,7 @@ def load_commonvoice_item(
class COMMONVOICE(Dataset): class COMMONVOICE(Dataset):
"""Create a Dataset for CommonVoice. """Create a Dataset for *CommonVoice* [:footcite:`ardila2020common`].
Args: Args:
root (str or Path): Path to the directory where the dataset is located. root (str or Path): Path to the directory where the dataset is located.
......
...@@ -16,7 +16,7 @@ _SUPPORTED_SUBSETS = {"train", "test"} ...@@ -16,7 +16,7 @@ _SUPPORTED_SUBSETS = {"train", "test"}
class DR_VCTK(Dataset): class DR_VCTK(Dataset):
"""Create a dataset for Device Recorded VCTK (Small subset version). """Create a dataset for *Device Recorded VCTK (Small subset version)* [:footcite:`Sarfjoo2018DeviceRV`].
Args: Args:
root (str or Path): Root directory where the dataset's top level directory is found. root (str or Path): Root directory where the dataset's top level directory is found.
......
...@@ -998,7 +998,7 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str ...@@ -998,7 +998,7 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str
class GTZAN(Dataset): class GTZAN(Dataset):
"""Create a Dataset for GTZAN. """Create a Dataset for *GTZAN* [:footcite:`tzanetakis_essl_cook_2001`].
Note: Note:
Please see http://marsyas.info/downloads/datasets.html if you are planning to use Please see http://marsyas.info/downloads/datasets.html if you are planning to use
......
...@@ -9,7 +9,7 @@ SampleType = Tuple[int, torch.Tensor, List[torch.Tensor]] ...@@ -9,7 +9,7 @@ SampleType = Tuple[int, torch.Tensor, List[torch.Tensor]]
class LibriMix(Dataset): class LibriMix(Dataset):
r"""Create the LibriMix dataset. r"""Create the *LibriMix* [:footcite:`cosentino2020librimix`] dataset.
Args: Args:
root (str or Path): The path to the directory where the directory ``Libri2Mix`` or root (str or Path): The path to the directory where the directory ``Libri2Mix`` or
......
...@@ -59,7 +59,7 @@ def load_librispeech_item( ...@@ -59,7 +59,7 @@ def load_librispeech_item(
class LIBRISPEECH(Dataset): class LIBRISPEECH(Dataset):
"""Create a Dataset for LibriSpeech. """Create a Dataset for *LibriSpeech* [:footcite:`7178964`].
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -65,7 +65,7 @@ def load_libritts_item( ...@@ -65,7 +65,7 @@ def load_libritts_item(
class LIBRITTS(Dataset): class LIBRITTS(Dataset):
"""Create a Dataset for LibriTTS. """Create a Dataset for *LibriTTS* [:footcite:`Zen2019LibriTTSAC`].
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -20,7 +20,7 @@ _RELEASE_CONFIGS = { ...@@ -20,7 +20,7 @@ _RELEASE_CONFIGS = {
class LJSPEECH(Dataset): class LJSPEECH(Dataset):
"""Create a Dataset for LJSpeech-1.1. """Create a Dataset for *LJSpeech-1.1* [:footcite:`ljspeech17`].
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -23,7 +23,7 @@ _LANGUAGES = [ ...@@ -23,7 +23,7 @@ _LANGUAGES = [
class QUESST14(Dataset): class QUESST14(Dataset):
"""Create QUESST14 Dataset """Create *QUESST14* [:footcite:`Mir2015QUESST2014EQ`] Dataset
Args: Args:
root (str or Path): Root directory where the dataset's top level directory is found root (str or Path): Root directory where the dataset's top level directory is found
......
...@@ -51,7 +51,7 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str ...@@ -51,7 +51,7 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str
class SPEECHCOMMANDS(Dataset): class SPEECHCOMMANDS(Dataset):
"""Create a Dataset for Speech Commands. """Create a Dataset for *Speech Commands* [:footcite:`speechcommandsv2`].
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -44,7 +44,7 @@ _RELEASE_CONFIGS = { ...@@ -44,7 +44,7 @@ _RELEASE_CONFIGS = {
class TEDLIUM(Dataset): class TEDLIUM(Dataset):
""" """
Create a Dataset for Tedlium. It supports releases 1,2 and 3. Create a Dataset for *Tedlium* [:footcite:`rousseau2012tedlium`]. It supports releases 1,2 and 3.
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
...@@ -19,7 +19,7 @@ SampleType = Tuple[Tensor, int, str, str, str] ...@@ -19,7 +19,7 @@ SampleType = Tuple[Tensor, int, str, str, str]
class VCTK_092(Dataset): class VCTK_092(Dataset):
"""Create VCTK 0.92 Dataset """Create *VCTK 0.92* [:footcite:`yamagishi2019vctk`] Dataset
Args: Args:
root (str): Root directory where the dataset's top level directory is found. root (str): Root directory where the dataset's top level directory is found.
......
...@@ -21,7 +21,7 @@ _RELEASE_CONFIGS = { ...@@ -21,7 +21,7 @@ _RELEASE_CONFIGS = {
class YESNO(Dataset): class YESNO(Dataset):
"""Create a Dataset for YesNo. """Create a Dataset for *YesNo* [:footcite:`YesNo`].
Args: Args:
root (str or Path): Path to the directory where the dataset is found or downloaded. root (str or Path): Path to the directory where the dataset is found or downloaded.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment