Add citations for datasets (#2371)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/2371 Reviewed By: xiaohui-zhang Differential Revision: D36246167 Pulled By: carolineechen fbshipit-source-id: 23042a1c393711864a18c9815d248c18d1d258b4

Add citations for datasets (#2371)
Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/2371 Reviewed By: xiaohui-zhang Differential Revision: D36246167 Pulled By: carolineechen fbshipit-source-id: 23042a1c393711864a18c9815d248c18d1d258b4
638120ca · Caroline Chen · Facebook GitHub Bot · fe3d5d10 · 638120ca · 638120ca
Commit 638120ca authored May 09, 2022 by Caroline Chen Committed by Facebook GitHub Bot May 09, 2022
16 changed files
--- a/docs/source/datasets.rst
+++ b/docs/source/datasets.rst
@@ -127,3 +127,9 @@ QUESST14
 .. autoclass:: QUESST14
  :members:
  :special-members: __getitem__
+
+
+References
+~~~~~~~~~~
+
+.. footbibliography::
--- a/docs/source/refs.bib
+++ b/docs/source/refs.bib
@@ -274,3 +274,79 @@
  journal={arXiv preprint arXiv:2201.12465},
  year={2022}
 }
+@TECHREPORT{Kominek03cmuarctic,
+  author = {John Kominek and Alan W Black and Ver Ver},
+  title = {CMU Arctic Databases for Speech Synthesis},
+  institution = {},
+  year = {2003}
+}
+@misc{cosentino2020librimix,
+  title={LibriMix: An Open-Source Dataset for Generalizable Speech Separation},
+  author={Joris Cosentino and Manuel Pariente and Samuele Cornell and Antoine Deleforge and Emmanuel Vincent},
+  year={2020},
+  eprint={2005.11262},
+  archivePrefix={arXiv},
+  primaryClass={eess.AS}
+}
+@article{Zen2019LibriTTSAC,
+  title={LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech},
+  author={Heiga Zen and Viet-Trung Dang and Robert A. J. Clark and Yu Zhang and Ron J. Weiss and Ye Jia and Z. Chen and Yonghui Wu},
+  journal={ArXiv},
+  year={2019},
+  volume={abs/1904.02882}
+}
+@article{speechcommandsv2,
+  author = { {Warden}, P.},
+  title = "{Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition}",
+  journal = {ArXiv e-prints},
+  archivePrefix = "arXiv",
+  eprint = {1804.03209},
+  primaryClass = "cs.CL",
+  keywords = {Computer Science - Computation and Language, Computer Science - Human-Computer Interaction},
+  year = 2018,
+  month = apr,
+  url = {https://arxiv.org/abs/1804.03209},
+}
+@inproceedings{rousseau2012tedlium,
+  title={TED-LIUM: an Automatic Speech Recognition dedicated corpus},
+  author={Rousseau, Anthony and Del{\'e}glise, Paul and Est{\`e}ve, Yannick},
+  booktitle={Conference on Language Resources and Evaluation (LREC)},
+  pages={125--129},
+  year={2012}
+}
+@misc{yamagishi2019vctk,
+  author={Yamagishi, Junichi and Veaux, Christophe and MacDonald, Kirsten},
+  title={ {CSTR VCTK Corpus}: English Multi-speaker Corpus for {CSTR} Voice Cloning Toolkit (version 0.92)},
+  publisher={University of Edinburgh. The Centre for Speech Technology Research (CSTR)},
+  year=2019,
+  doi={10.7488/ds/2645},
+}
+@misc{Sarfjoo2018DeviceRV,
+  title={Device Recorded VCTK (Small subset version)},
+  author={Seyyed Saeed Sarfjoo and Junichi Yamagishi},
+  year={2018}
+}
+@misc{tzanetakis_essl_cook_2001,
+  author    = "Tzanetakis, George and Essl, Georg and Cook, Perry",
+  title     = "Automatic Musical Genre Classification Of Audio Signals",
+  url       = "http://ismir2001.ismir.net/pdf/tzanetakis.pdf",
+  publisher = "The International Society for Music Information Retrieval",
+  year      = "2001"
+}
+@article{Mir2015QUESST2014EQ,
+  title={QUESST2014: Evaluating Query-by-Example Speech Search in a zero-resource setting with real-life queries},
+  author={Xavier Anguera Miro and Luis Javier Rodriguez-Fuentes and Andi Buzo and Florian Metze and Igor Szoke and Mikel Pe{\~n}agarikano},
+  journal={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+  year={2015},
+  pages={5833-5837}
+}
+@misc{cmudict,
+  title={The Carnegie Mellon pronuncing dictionary},
+  author={Weide, R.L.},
+  year={1998},
+  url={http://www.speech.cs.cmu.edu/cgi-bin/cmudict},
+}
+@misc{YesNo,
+  title="YesNo",
+  url="http://www.openslr.org/1/"
+}
--- a/torchaudio/datasets/cmuarctic.py
+++ b/torchaudio/datasets/cmuarctic.py
@@ -51,7 +51,7 @@ def load_cmuarctic_item(line: str, path: str, folder_audio: str, ext_audio: str)


 class CMUARCTIC(Dataset):
-    """Create a Dataset for CMU_ARCTIC.
+    """Create a Dataset for *CMU ARCTIC* [:footcite:`Kominek03cmuarctic`].

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/cmudict.py
+++ b/torchaudio/datasets/cmudict.py
@@ -104,7 +104,7 @@ def _parse_dictionary(lines: Iterable[str], exclude_punctuations: bool) -> List[


 class CMUDict(Dataset):
-    """Create a Dataset for CMU Pronouncing Dictionary (CMUDict).
+    """Create a Dataset for *CMU Pronouncing Dictionary* [:footcite:`cmudict`] (CMUDict).

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/commonvoice.py
+++ b/torchaudio/datasets/commonvoice.py
@@ -27,7 +27,7 @@ def load_commonvoice_item(


 class COMMONVOICE(Dataset):
-    """Create a Dataset for CommonVoice.
+    """Create a Dataset for *CommonVoice* [:footcite:`ardila2020common`].

    Args:
        root (str or Path): Path to the directory where the dataset is located.

--- a/torchaudio/datasets/dr_vctk.py
+++ b/torchaudio/datasets/dr_vctk.py
@@ -16,7 +16,7 @@ _SUPPORTED_SUBSETS = {"train", "test"}


 class DR_VCTK(Dataset):
-    """Create a dataset for Device Recorded VCTK (Small subset version).
+    """Create a dataset for *Device Recorded VCTK (Small subset version)* [:footcite:`Sarfjoo2018DeviceRV`].

    Args:
        root (str or Path): Root directory where the dataset's top level directory is found.

--- a/torchaudio/datasets/gtzan.py
+++ b/torchaudio/datasets/gtzan.py
@@ -998,7 +998,7 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str


 class GTZAN(Dataset):
-    """Create a Dataset for GTZAN.
+    """Create a Dataset for *GTZAN* [:footcite:`tzanetakis_essl_cook_2001`].

    Note:
        Please see http://marsyas.info/downloads/datasets.html if you are planning to use

--- a/torchaudio/datasets/librimix.py
+++ b/torchaudio/datasets/librimix.py
@@ -9,7 +9,7 @@ SampleType = Tuple[int, torch.Tensor, List[torch.Tensor]]


 class LibriMix(Dataset):
-    r"""Create the LibriMix dataset.
+    r"""Create the *LibriMix* [:footcite:`cosentino2020librimix`] dataset.

    Args:
        root (str or Path): The path to the directory where the directory ``Libri2Mix`` or

--- a/torchaudio/datasets/librispeech.py
+++ b/torchaudio/datasets/librispeech.py
@@ -59,7 +59,7 @@ def load_librispeech_item(


 class LIBRISPEECH(Dataset):
-    """Create a Dataset for LibriSpeech.
+    """Create a Dataset for *LibriSpeech* [:footcite:`7178964`].

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/libritts.py
+++ b/torchaudio/datasets/libritts.py
@@ -65,7 +65,7 @@ def load_libritts_item(


 class LIBRITTS(Dataset):
-    """Create a Dataset for LibriTTS.
+    """Create a Dataset for *LibriTTS* [:footcite:`Zen2019LibriTTSAC`].

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/ljspeech.py
+++ b/torchaudio/datasets/ljspeech.py
@@ -20,7 +20,7 @@ _RELEASE_CONFIGS = {


 class LJSPEECH(Dataset):
-    """Create a Dataset for LJSpeech-1.1.
+    """Create a Dataset for *LJSpeech-1.1* [:footcite:`ljspeech17`].

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/quesst14.py
+++ b/torchaudio/datasets/quesst14.py
@@ -23,7 +23,7 @@ _LANGUAGES = [


 class QUESST14(Dataset):
-    """Create QUESST14 Dataset
+    """Create *QUESST14* [:footcite:`Mir2015QUESST2014EQ`] Dataset

    Args:
        root (str or Path): Root directory where the dataset's top level directory is found

--- a/torchaudio/datasets/speechcommands.py
+++ b/torchaudio/datasets/speechcommands.py
@@ -51,7 +51,7 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str


 class SPEECHCOMMANDS(Dataset):
-    """Create a Dataset for Speech Commands.
+    """Create a Dataset for *Speech Commands* [:footcite:`speechcommandsv2`].

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/tedlium.py
+++ b/torchaudio/datasets/tedlium.py
@@ -44,7 +44,7 @@ _RELEASE_CONFIGS = {

 class TEDLIUM(Dataset):
    """
-    Create a Dataset for Tedlium. It supports releases 1,2 and 3.
+    Create a Dataset for *Tedlium* [:footcite:`rousseau2012tedlium`]. It supports releases 1,2 and 3.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.

--- a/torchaudio/datasets/vctk.py
+++ b/torchaudio/datasets/vctk.py
@@ -19,7 +19,7 @@ SampleType = Tuple[Tensor, int, str, str, str]


 class VCTK_092(Dataset):
-    """Create VCTK 0.92 Dataset
+    """Create *VCTK 0.92* [:footcite:`yamagishi2019vctk`] Dataset

    Args:
        root (str): Root directory where the dataset's top level directory is found.

--- a/torchaudio/datasets/yesno.py
+++ b/torchaudio/datasets/yesno.py
@@ -21,7 +21,7 @@ _RELEASE_CONFIGS = {


 class YESNO(Dataset):
-    """Create a Dataset for YesNo.
+    """Create a Dataset for *YesNo* [:footcite:`YesNo`].

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.