Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
e3d1d746
Unverified
Commit
e3d1d746
authored
Oct 02, 2020
by
moto
Committed by
GitHub
Oct 02, 2020
Browse files
Update docstrings/documentations of all the datasets (#931)
parent
963224f5
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
257 additions
and
102 deletions
+257
-102
docs/source/datasets.rst
docs/source/datasets.rst
+23
-20
torchaudio/datasets/cmuarctic.py
torchaudio/datasets/cmuarctic.py
+22
-3
torchaudio/datasets/commonvoice.py
torchaudio/datasets/commonvoice.py
+32
-5
torchaudio/datasets/gtzan.py
torchaudio/datasets/gtzan.py
+25
-7
torchaudio/datasets/librispeech.py
torchaudio/datasets/librispeech.py
+21
-3
torchaudio/datasets/libritts.py
torchaudio/datasets/libritts.py
+22
-3
torchaudio/datasets/ljspeech.py
torchaudio/datasets/ljspeech.py
+18
-3
torchaudio/datasets/speechcommands.py
torchaudio/datasets/speechcommands.py
+20
-3
torchaudio/datasets/tedlium.py
torchaudio/datasets/tedlium.py
+15
-41
torchaudio/datasets/vctk.py
torchaudio/datasets/vctk.py
+39
-11
torchaudio/datasets/yesno.py
torchaudio/datasets/yesno.py
+20
-3
No files found.
docs/source/datasets.rst
View file @
e3d1d746
...
@@ -29,82 +29,85 @@ CMUARCTIC
...
@@ -29,82 +29,85 @@ CMUARCTIC
~~~~~~~~~
~~~~~~~~~
.. autoclass:: CMUARCTIC
.. autoclass:: CMUARCTIC
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
COMMONVOICE
COMMONVOICE
~~~~~~~~~~~
~~~~~~~~~~~
.. autoclass:: COMMONVOICE
.. autoclass:: COMMONVOICE
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
GTZAN
GTZAN
~~~~~
~~~~~
.. autoclass:: GTZAN
.. autoclass:: GTZAN
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
LIBRISPEECH
LIBRISPEECH
~~~~~~~~~~~
~~~~~~~~~~~
.. autoclass:: LIBRISPEECH
.. autoclass:: LIBRISPEECH
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
LIBRITTS
LIBRITTS
~~~~~~~~
~~~~~~~~
.. autoclass:: LIBRITTS
.. autoclass:: LIBRITTS
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
LJSPEECH
LJSPEECH
~~~~~~~~
~~~~~~~~
.. autoclass:: LJSPEECH
.. autoclass:: LJSPEECH
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
SPEECHCOMMANDS
SPEECHCOMMANDS
~~~~~~~~~~~~~~
~~~~~~~~~~~~~~
.. autoclass:: SPEECHCOMMANDS
.. autoclass:: SPEECHCOMMANDS
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
TEDLIUM
TEDLIUM
~~~~~~~~~~~~~~
~~~~~~~~~~~~~~
.. autoclass:: TEDLIUM
.. autoclass:: TEDLIUM
:members: __getitem__
:members:
:special-members: get_phoneme_dict
:special-members: __getitem__
VCTK
VCTK
~~~~
~~~~
.. autoclass:: VCTK
.. autoclass:: VCTK
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
VCTK_092
VCTK_092
~~~~~~~~
~~~~~~~~
.. autoclass:: VCTK_092
.. autoclass:: VCTK_092
:members:
:special-members: __getitem__
YESNO
YESNO
~~~~~
~~~~~
.. autoclass:: YESNO
.. autoclass:: YESNO
:members:
__getitem__
:members:
:special-members:
:special-members:
__getitem__
torchaudio/datasets/cmuarctic.py
View file @
e3d1d746
...
@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,
...
@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,
class
CMUARCTIC
(
Dataset
):
class
CMUARCTIC
(
Dataset
):
"""
"""Create a Dataset for CMU_ARCTIC.
Create a Dataset for CMU_arctic. Each item is a tuple of the form:
waveform, sample_rate, utterance, utterance_id
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional):
The URL to download the dataset from or the type of the dataset to dowload.
(default: ``"aew"``)
Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"ARCTIC"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
"""
_file_text
=
"txt.done.data"
_file_text
=
"txt.done.data"
...
@@ -143,6 +154,14 @@ class CMUARCTIC(Dataset):
...
@@ -143,6 +154,14 @@ class CMUARCTIC(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, utterance_id)``
"""
line
=
self
.
_walker
[
n
]
line
=
self
.
_walker
[
n
]
return
load_cmuarctic_item
(
line
,
self
.
_path
,
self
.
_folder_audio
,
self
.
_ext_audio
)
return
load_cmuarctic_item
(
line
,
self
.
_path
,
self
.
_folder_audio
,
self
.
_ext_audio
)
...
...
torchaudio/datasets/commonvoice.py
View file @
e3d1d746
...
@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],
...
@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],
class
COMMONVOICE
(
Dataset
):
class
COMMONVOICE
(
Dataset
):
"""
"""Create a Dataset for CommonVoice.
Create a Dataset for CommonVoice. Each item is a tuple of the form:
(waveform, sample_rate, dictionary)
Args:
where dictionary is a dictionary built from the tsv file with the following keys:
root (str): Path to the directory where the dataset is found or downloaded.
client_id, path, sentence, up_votes, down_votes, age, gender, accent.
tsv (str, optional): The name of the tsv file used to construct the metadata.
(default: ``"train.tsv"``)
url (str, optional): The URL to download the dataset from, or the language of
the dataset to download. (default: ``"english"``).
Allowed language values are ``"tatar"``, ``"english"``, ``"german"``,
``"french"``, ``"welsh"``, ``"breton"``, ``"chuvash"``, ``"turkish"``, ``"kyrgyz"``,
``"irish"``, ``"kabyle"``, ``"catalan"``, ``"taiwanese"``, ``"slovenian"``,
``"italian"``, ``"dutch"``, ``"hakha chin"``, ``"esperanto"``, ``"estonian"``,
``"persian"``, ``"portuguese"``, ``"basque"``, ``"spanish"``, ``"chinese"``,
``"mongolian"``, ``"sakha"``, ``"dhivehi"``, ``"kinyarwanda"``, ``"swedish"``,
``"russian"``, ``"indonesian"``, ``"arabic"``, ``"tamil"``, ``"interlingua"``,
``"latvian"``, ``"japanese"``, ``"votic"``, ``"abkhaz"``, ``"cantonese"`` and
``"romansh sursilvan"``.
folder_in_archive (str, optional): The top-level directory of the dataset.
version (str): Version string. (default: ``"cv-corpus-4-2019-12-10"``)
For the other allowed values, Please checkout https://commonvoice.mozilla.org/en/datasets.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
"""
_ext_txt
=
".txt"
_ext_txt
=
".txt"
...
@@ -192,6 +209,16 @@ class COMMONVOICE(Dataset):
...
@@ -192,6 +209,16 @@ class COMMONVOICE(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
Dict
[
str
,
str
]]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
Dict
[
str
,
str
]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, dictionary)``, where dictionary is built
from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
"""
line
=
self
.
_walker
[
n
]
line
=
self
.
_walker
[
n
]
return
load_commonvoice_item
(
line
,
self
.
_header
,
self
.
_path
,
self
.
_folder_audio
)
return
load_commonvoice_item
(
line
,
self
.
_header
,
self
.
_path
,
self
.
_folder_audio
)
...
...
torchaudio/datasets/gtzan.py
View file @
e3d1d746
import
os
import
os
import
warnings
import
warnings
from
typing
import
Any
,
Tuple
from
typing
import
Any
,
Tuple
,
Optional
import
torchaudio
import
torchaudio
from
torch
import
Tensor
from
torch
import
Tensor
...
@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str
...
@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str
class
GTZAN
(
Dataset
):
class
GTZAN
(
Dataset
):
"""
"""Create a Dataset for GTZAN.
Create a Dataset for GTZAN. Each item is a tuple of the form:
waveform, sample_rate, label.
Note:
Please see http://marsyas.info/downloads/datasets.html if you are planning to use
this dataset to publish results.
Please see http://marsyas.info/downloads/datasets.html
Args:
if you are planning to use this dataset to publish results.
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
folder_in_archive (str, optional): The top-level directory of the dataset.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
subset (str, optional): Which subset of the dataset to use.
One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
If ``None``, the entire dataset is used. (default: ``None``).
"""
"""
_ext_audio
=
".wav"
_ext_audio
=
".wav"
...
@@ -1014,7 +1024,7 @@ class GTZAN(Dataset):
...
@@ -1014,7 +1024,7 @@ class GTZAN(Dataset):
url
:
str
=
URL
,
url
:
str
=
URL
,
folder_in_archive
:
str
=
FOLDER_IN_ARCHIVE
,
folder_in_archive
:
str
=
FOLDER_IN_ARCHIVE
,
download
:
bool
=
False
,
download
:
bool
=
False
,
subset
:
Any
=
None
,
subset
:
Optional
[
str
]
=
None
,
)
->
None
:
)
->
None
:
# super(GTZAN, self).__init__()
# super(GTZAN, self).__init__()
...
@@ -1082,6 +1092,14 @@ class GTZAN(Dataset):
...
@@ -1082,6 +1092,14 @@ class GTZAN(Dataset):
self
.
_walker
=
filtered_test
self
.
_walker
=
filtered_test
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label)``
"""
fileid
=
self
.
_walker
[
n
]
fileid
=
self
.
_walker
[
n
]
item
=
load_gtzan_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
)
item
=
load_gtzan_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
)
waveform
,
sample_rate
,
label
=
item
waveform
,
sample_rate
,
label
=
item
...
...
torchaudio/datasets/librispeech.py
View file @
e3d1d746
...
@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,
...
@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,
class
LIBRISPEECH
(
Dataset
):
class
LIBRISPEECH
(
Dataset
):
"""
"""Create a Dataset for LibriSpeech.
Create a Dataset for LibriSpeech. Each item is a tuple of the form:
waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriSpeech"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
"""
_ext_txt
=
".trans.txt"
_ext_txt
=
".trans.txt"
...
@@ -117,6 +127,14 @@ class LIBRISPEECH(Dataset):
...
@@ -117,6 +127,14 @@ class LIBRISPEECH(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
int
,
int
,
int
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
int
,
int
,
int
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
"""
fileid
=
self
.
_walker
[
n
]
fileid
=
self
.
_walker
[
n
]
return
load_librispeech_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
,
self
.
_ext_txt
)
return
load_librispeech_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
,
self
.
_ext_txt
)
...
...
torchaudio/datasets/libritts.py
View file @
e3d1d746
...
@@ -65,9 +65,19 @@ def load_libritts_item(
...
@@ -65,9 +65,19 @@ def load_libritts_item(
class
LIBRITTS
(
Dataset
):
class
LIBRITTS
(
Dataset
):
"""
"""Create a Dataset for LibriTTS.
Create a Dataset for LibriTTS. Each item is a tuple of the form:
waveform, sample_rate, original_text, normalized_text, speaker_id, chapter_id, utterance_id
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriTTS"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
"""
_ext_original_txt
=
".original.txt"
_ext_original_txt
=
".original.txt"
...
@@ -118,6 +128,15 @@ class LIBRITTS(Dataset):
...
@@ -118,6 +128,15 @@ class LIBRITTS(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
,
int
,
int
,
str
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
,
int
,
int
,
str
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, original_text, normalized_text, speaker_id,
chapter_id, utterance_id)``
"""
fileid
=
self
.
_walker
[
n
]
fileid
=
self
.
_walker
[
n
]
return
load_libritts_item
(
return
load_libritts_item
(
fileid
,
fileid
,
...
...
torchaudio/datasets/ljspeech.py
View file @
e3d1d746
...
@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens
...
@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens
class
LJSPEECH
(
Dataset
):
class
LJSPEECH
(
Dataset
):
"""
"""Create a Dataset for LJSpeech-1.1.
Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
waveform, sample_rate, transcript, normalized_transcript
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"wavs"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
"""
_ext_audio
=
".wav"
_ext_audio
=
".wav"
...
@@ -68,6 +75,14 @@ class LJSPEECH(Dataset):
...
@@ -68,6 +75,14 @@ class LJSPEECH(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, transcript, normalized_transcript)``
"""
line
=
self
.
_walker
[
n
]
line
=
self
.
_walker
[
n
]
return
load_ljspeech_item
(
line
,
self
.
_path
,
self
.
_ext_audio
)
return
load_ljspeech_item
(
line
,
self
.
_path
,
self
.
_ext_audio
)
...
...
torchaudio/datasets/speechcommands.py
View file @
e3d1d746
...
@@ -36,9 +36,18 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str
...
@@ -36,9 +36,18 @@ def load_speechcommands_item(filepath: str, path: str) -> Tuple[Tensor, int, str
class
SPEECHCOMMANDS
(
Dataset
):
class
SPEECHCOMMANDS
(
Dataset
):
"""
"""Create a Dataset for Speech Commands.
Create a Dataset for Speech Commands. Each item is a tuple of the form:
waveform, sample_rate, label, speaker_id, utterance_number
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"speech_commands_v0.01"`` and ``"speech_commands_v0.02"``
(default: ``"speech_commands_v0.02"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"SpeechCommands"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -75,6 +84,14 @@ class SPEECHCOMMANDS(Dataset):
...
@@ -75,6 +84,14 @@ class SPEECHCOMMANDS(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
,
int
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
,
int
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label, speaker_id, utterance_number)``
"""
fileid
=
self
.
_walker
[
n
]
fileid
=
self
.
_walker
[
n
]
return
load_speechcommands_item
(
fileid
,
self
.
_path
)
return
load_speechcommands_item
(
fileid
,
self
.
_path
)
...
...
torchaudio/datasets/tedlium.py
View file @
e3d1d746
...
@@ -43,44 +43,21 @@ _RELEASE_CONFIGS = {
...
@@ -43,44 +43,21 @@ _RELEASE_CONFIGS = {
class
TEDLIUM
(
Dataset
):
class
TEDLIUM
(
Dataset
):
"""
"""
Create a Dataset for Tedlium. It supports releases 1,2 and 3, each item is a list containings:
Create a Dataset for Tedlium. It supports releases 1,2 and 3.
[waveform, sample_rate, transcript, talk_id, speaker_id, identifier].
Constructor arguments:
Args:
Args:
root (str): Path containing dataset or target path where its downloaded if needed
root (str): Path to the directory where the dataset is found or downloaded.
release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
release (str, optional): Release version.
subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
Allowed values are ``"release1"``, ``"release2"`` or ``"release3"``.
download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
(default: ``"release1"``).
audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
subset (str, optional): The subset of dataset to use. Valid options are ``"train"``, ``"dev"``,
and ``"test"`` for releases 1&2, ``None`` for release3. Defaults to ``"train"`` or ``None``.
Special functions:
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
_load_tedlium_item: Loads a TEDLIUM dataset sample given a file name and corresponding sentence name
_load_audio: Default load function used in TEDLIUM dataset, you can overwrite this function to customize
functionality and load individual sentences from a full ted audio talk file
get_phoneme_dict: Returns the phoneme dictionary of a TEDLIUM release
"""
"""
def
__init__
(
def
__init__
(
self
,
root
:
str
,
release
:
str
=
"release1"
,
subset
:
str
=
None
,
download
:
bool
=
False
,
audio_ext
=
".sph"
self
,
root
:
str
,
release
:
str
=
"release1"
,
subset
:
str
=
None
,
download
:
bool
=
False
,
audio_ext
=
".sph"
)
->
None
:
)
->
None
:
"""Constructor for TEDLIUM dataset.
Args:
root (str): Path containing dataset or target path where its downloaded if needed
release (str, optional): TEDLIUM identifier (release1,release2,release3). Defaults to RELEASE.
subset (str, optional): train/dev/test for releases 1&2, None for release3. Defaults to Train/None
download (bool, optional): Download dataset in case is not founded in root path. Defaults to False.
audio_ext (str, optional): Overwrite audio extension when loading items. Defaults to ".sph".
Raises:
RuntimeError: If release identifier does not match any supported release,
"""
self
.
_ext_audio
=
audio_ext
self
.
_ext_audio
=
audio_ext
if
release
in
_RELEASE_CONFIGS
.
keys
():
if
release
in
_RELEASE_CONFIGS
.
keys
():
folder_in_archive
=
_RELEASE_CONFIGS
[
release
][
"folder_in_archive"
]
folder_in_archive
=
_RELEASE_CONFIGS
[
release
][
"folder_in_archive"
]
...
@@ -140,7 +117,7 @@ class TEDLIUM(Dataset):
...
@@ -140,7 +117,7 @@ class TEDLIUM(Dataset):
path (str): Dataset root path
path (str): Dataset root path
Returns:
Returns:
Tedlium_item: A namedTuple containing [
waveform, sample_rate, transcript, talk_id, speaker_id, identifier
]
tuple: ``(
waveform, sample_rate, transcript, talk_id, speaker_id, identifier
)``
"""
"""
transcript_path
=
os
.
path
.
join
(
path
,
"stm"
,
fileid
)
transcript_path
=
os
.
path
.
join
(
path
,
"stm"
,
fileid
)
with
open
(
transcript_path
+
".stm"
)
as
f
:
with
open
(
transcript_path
+
".stm"
)
as
f
:
...
@@ -171,14 +148,13 @@ class TEDLIUM(Dataset):
...
@@ -171,14 +148,13 @@ class TEDLIUM(Dataset):
return
torchaudio
.
load
(
path
)[:,
start_time
:
end_time
]
return
torchaudio
.
load
(
path
)[:,
start_time
:
end_time
]
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
int
,
int
,
int
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
int
,
int
,
int
]:
"""TEDLIUM dataset custom function overwritting default loadbehaviour
"""Load the n-th sample from the dataset.
Loads a TEDLIUM sample given a index N.
Args:
Args:
n (int):
I
ndex of sample to be loaded
n (int):
The i
ndex of
the
sample to be loaded
Returns:
Returns:
Tedlium_item: A namedTuple containing [
waveform, sample_rate, transcript, talk_id, speaker_id, identifier
]
tuple: ``(
waveform, sample_rate, transcript, talk_id, speaker_id, identifier
)``
"""
"""
fileid
,
line
=
self
.
_filelist
[
n
]
fileid
,
line
=
self
.
_filelist
[
n
]
return
self
.
_load_tedlium_item
(
fileid
,
line
,
self
.
_path
)
return
self
.
_load_tedlium_item
(
fileid
,
line
,
self
.
_path
)
...
@@ -193,10 +169,8 @@ class TEDLIUM(Dataset):
...
@@ -193,10 +169,8 @@ class TEDLIUM(Dataset):
@
property
@
property
def
phoneme_dict
(
self
):
def
phoneme_dict
(
self
):
"""Returns the phoneme dictionary of a TEDLIUM release.
"""dict[str, tuple[str]]: Phonemes. Mapping from word to tuple of phonemes.
Note that some words have empty phonemes.
Returns:
dictionary: Phoneme dictionary for the current tedlium release
"""
"""
# Read phoneme dictionary
# Read phoneme dictionary
if
not
self
.
_phoneme_dict
:
if
not
self
.
_phoneme_dict
:
...
...
torchaudio/datasets/vctk.py
View file @
e3d1d746
...
@@ -54,12 +54,25 @@ def load_vctk_item(fileid: str,
...
@@ -54,12 +54,25 @@ def load_vctk_item(fileid: str,
class
VCTK
(
Dataset
):
class
VCTK
(
Dataset
):
"""
"""Create a Dataset for VCTK.
Create a Dataset for VCTK. Each item is a tuple of the form:
(waveform, sample_rate, utterance, speaker_id, utterance_id)
Note:
* **This dataset is no longer publicly available.** Please use :py:class:`VCTK_092`
* Directory ``p315`` is ignored because there is no corresponding text files.
For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443
Folder `p315` will be ignored due to the non-existent corresponding text files.
Args:
For more information about the dataset visit: https://datashare.is.ed.ac.uk/handle/10283/3443
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): Not used as the dataset is no longer publicly available.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"VCTK-Corpus"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
Giving ``download=True`` will result in error as the dataset is no longer
publicly available.
downsample (bool, optional): Not used.
transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
"""
"""
_folder_txt
=
"txt"
_folder_txt
=
"txt"
...
@@ -118,6 +131,14 @@ class VCTK(Dataset):
...
@@ -118,6 +131,14 @@ class VCTK(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
,
str
]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
str
,
str
,
str
]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
"""
fileid
=
self
.
_walker
[
n
]
fileid
=
self
.
_walker
[
n
]
item
=
load_vctk_item
(
item
=
load_vctk_item
(
fileid
,
fileid
,
...
@@ -145,14 +166,13 @@ class VCTK(Dataset):
...
@@ -145,14 +166,13 @@ class VCTK(Dataset):
class
VCTK_092
(
Dataset
):
class
VCTK_092
(
Dataset
):
"""Create VCTK 0.92 Dataset
"""Create VCTK 0.92 Dataset
An item is a ``namedtuple`` of (``waveform``, ``sample_rate``, ``utterance``,
``speaker_id``, ``utterance_id``)
Args:
Args:
root (str): Root directory where the dataset's top level directory is found.
root (str): Root directory where the dataset's top level directory is found.
mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``
mic_id (str): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
download (bool, optional): Download the dataset if not found in the given directory.
download (bool, optional):
url (str, optional): URL from which the dataset is downloaded.
Whether to download the dataset if it is not found at root path. (default: ``False``).
url (str, optional): The URL to download the dataset from.
(default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.
audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.
Note:
Note:
...
@@ -252,6 +272,14 @@ class VCTK_092(Dataset):
...
@@ -252,6 +272,14 @@ class VCTK_092(Dataset):
return
Sample
(
waveform
,
sample_rate
,
utterance
,
speaker_id
,
utterance_id
)
return
Sample
(
waveform
,
sample_rate
,
utterance
,
speaker_id
,
utterance_id
)
def
__getitem__
(
self
,
n
:
int
)
->
Sample
:
def
__getitem__
(
self
,
n
:
int
)
->
Sample
:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, utterance_id)``
"""
speaker_id
,
utterance_id
=
self
.
_sample_ids
[
n
]
speaker_id
,
utterance_id
=
self
.
_sample_ids
[
n
]
return
self
.
_load_sample
(
speaker_id
,
utterance_id
,
self
.
_mic_id
)
return
self
.
_load_sample
(
speaker_id
,
utterance_id
,
self
.
_mic_id
)
...
...
torchaudio/datasets/yesno.py
View file @
e3d1d746
...
@@ -31,9 +31,18 @@ def load_yesno_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, int
...
@@ -31,9 +31,18 @@ def load_yesno_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, int
class
YESNO
(
Dataset
):
class
YESNO
(
Dataset
):
"""
"""Create a Dataset for YesNo.
Create a Dataset for YesNo. Each item is a tuple of the form:
(waveform, sample_rate, labels)
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://www.openslr.org/resources/1/waves_yesno.tar.gz"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"waves_yesno"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
transform (callable, optional): Optional transform applied on waveform. (default: ``None``)
target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``)
"""
"""
_ext_audio
=
".wav"
_ext_audio
=
".wav"
...
@@ -78,6 +87,14 @@ class YESNO(Dataset):
...
@@ -78,6 +87,14 @@ class YESNO(Dataset):
self
.
_walker
=
list
(
walker
)
self
.
_walker
=
list
(
walker
)
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
List
[
int
]]:
def
__getitem__
(
self
,
n
:
int
)
->
Tuple
[
Tensor
,
int
,
List
[
int
]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, labels)``
"""
fileid
=
self
.
_walker
[
n
]
fileid
=
self
.
_walker
[
n
]
item
=
load_yesno_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
)
item
=
load_yesno_item
(
fileid
,
self
.
_path
,
self
.
_ext_audio
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment