"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "1108d8b34e43e968812eded0ccda73503ccad77d"
Unverified Commit 32bae85c authored by Taras Sereda's avatar Taras Sereda Committed by GitHub
Browse files

LJ Speech dataset (#439)

* LJ Speech dataset

* refactoring

as per @vincentqb's suggestions
parent 445e14d1
LJ001-0001|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition
...@@ -6,6 +6,7 @@ from torchaudio.datasets.librispeech import LIBRISPEECH ...@@ -6,6 +6,7 @@ from torchaudio.datasets.librispeech import LIBRISPEECH
from torchaudio.datasets.utils import diskcache_iterator, bg_iterator from torchaudio.datasets.utils import diskcache_iterator, bg_iterator
from torchaudio.datasets.vctk import VCTK from torchaudio.datasets.vctk import VCTK
from torchaudio.datasets.yesno import YESNO from torchaudio.datasets.yesno import YESNO
from torchaudio.datasets.ljspeech import LJSPEECH
import common_utils import common_utils
...@@ -47,6 +48,10 @@ class TestDatasets(unittest.TestCase): ...@@ -47,6 +48,10 @@ class TestDatasets(unittest.TestCase):
for d in data: for d in data:
pass pass
def test_ljspeech(self):
data = LJSPEECH(self.path)
data[0]
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -3,12 +3,14 @@ from .librispeech import LIBRISPEECH ...@@ -3,12 +3,14 @@ from .librispeech import LIBRISPEECH
from .utils import bg_iterator, diskcache_iterator from .utils import bg_iterator, diskcache_iterator
from .vctk import VCTK from .vctk import VCTK
from .yesno import YESNO from .yesno import YESNO
from .ljspeech import LJSPEECH
__all__ = ( __all__ = (
"COMMONVOICE", "COMMONVOICE",
"LIBRISPEECH", "LIBRISPEECH",
"VCTK", "VCTK",
"YESNO", "YESNO",
"LJSPEECH",
"diskcache_iterator", "diskcache_iterator",
"bg_iterator", "bg_iterator",
) )
import os
import csv
import torchaudio
from torchaudio.datasets.utils import download_url, extract_archive, unicode_csv_reader
from torch.utils.data import Dataset
URL = "https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"
FOLDER_IN_ARCHIVE = "wavs"
def load_ljspeech_item(line, path, ext_audio):
assert len(line) == 3
fileid, transcript, normalized_transcript = line
fileid_audio = fileid + ext_audio
fileid_audio = os.path.join(path, fileid_audio)
# Load audio
waveform, sample_rate = torchaudio.load(fileid_audio)
return (
waveform,
sample_rate,
transcript,
normalized_transcript,
)
class LJSPEECH(Dataset):
"""
Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
waveform, sample_rate, transcript, normalized_transcript
"""
_ext_audio = ".wav"
_ext_archive = '.tar.bz2'
def __init__(
self, root, url=URL, folder_in_archive=FOLDER_IN_ARCHIVE, download=False
):
basename = os.path.basename(url)
archive = os.path.join(root, basename)
basename = basename.split(self._ext_archive)[0]
folder_in_archive = os.path.join(basename, folder_in_archive)
self._path = os.path.join(root, folder_in_archive)
self._metadata_path = os.path.join(root, basename, 'metadata.csv')
if download:
if not os.path.isdir(self._path):
if not os.path.isfile(archive):
download_url(url, root)
extract_archive(archive)
with open(self._metadata_path, "r") as metadata:
walker = unicode_csv_reader(metadata, delimiter="|", quoting=csv.QUOTE_NONE)
self._walker = list(walker)
def __getitem__(self, n):
line = self._walker[n]
return load_ljspeech_item(line, self._path, self._ext_audio)
def __len__(self):
return len(self._walker)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment