"router/vscode:/vscode.git/clone" did not exist on "e36dfaa8de2d9a9fa67eeed5ce64fd5949916c99"
wsj0mix.py 2.44 KB
Newer Older
1
from pathlib import Path
2
from typing import List, Tuple, Union
3
4
5

import torch
import torchaudio
6
from torch.utils.data import Dataset
7
8
9
10
11
12
13
14
15
16
17
18
19
20

SampleType = Tuple[int, torch.Tensor, List[torch.Tensor]]


class WSJ0Mix(Dataset):
    """Create a Dataset for wsj0-mix.

    Args:
        root (str or Path): Path to the directory where the dataset is found.
        num_speakers (int): The number of speakers, which determines the directories
            to traverse. The Dataset will traverse ``s1`` to ``sN`` directories to collect
            N source audios.
        sample_rate (int): Expected sample rate of audio files. If any of the audio has a
            different sample rate, raises ``ValueError``.
21
        audio_ext (str, optional): The extension of audio files to find. (default: ".wav")
22
    """
23

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    def __init__(
        self,
        root: Union[str, Path],
        num_speakers: int,
        sample_rate: int,
        audio_ext: str = ".wav",
    ):
        self.root = Path(root)
        self.sample_rate = sample_rate
        self.mix_dir = (self.root / "mix").resolve()
        self.src_dirs = [(self.root / f"s{i+1}").resolve() for i in range(num_speakers)]

        self.files = [p.name for p in self.mix_dir.glob(f"*{audio_ext}")]
        self.files.sort()

    def _load_audio(self, path) -> torch.Tensor:
        waveform, sample_rate = torchaudio.load(path)
        if sample_rate != self.sample_rate:
            raise ValueError(
43
44
                f"The dataset contains audio file of sample rate {sample_rate}, "
                f"but the requested sample rate is {self.sample_rate}."
45
46
47
48
49
50
51
52
53
            )
        return waveform

    def _load_sample(self, filename) -> SampleType:
        mixed = self._load_audio(str(self.mix_dir / filename))
        srcs = []
        for i, dir_ in enumerate(self.src_dirs):
            src = self._load_audio(str(dir_ / filename))
            if mixed.shape != src.shape:
54
                raise ValueError(f"Different waveform shapes. mixed: {mixed.shape}, src[{i}]: {src.shape}")
55
56
57
58
59
60
61
62
63
            srcs.append(src)
        return self.sample_rate, mixed, srcs

    def __len__(self) -> int:
        return len(self.files)

    def __getitem__(self, key: int) -> SampleType:
        """Load the n-th sample from the dataset.
        Args:
64
            key (int): The index of the sample to be loaded
65
66
67
68
        Returns:
            tuple: ``(sample_rate, mix_waveform, list_of_source_waveforms)``
        """
        return self._load_sample(self.files[key])