Commit 1b444d87 authored by Caroline Chen's avatar Caroline Chen
Browse files

Add iemocap variants (#2778)

Summary:
add ability to load only improvised or only scripted utterances.

Pull Request resolved: https://github.com/pytorch/audio/pull/2778

Reviewed By: nateanl

Differential Revision: D40511865

Pulled By: carolineechen

fbshipit-source-id: e1fe3908ac2aa306ad30c242ddd25762b2268539
parent ee68a982
...@@ -4,7 +4,7 @@ import random ...@@ -4,7 +4,7 @@ import random
from torchaudio.datasets import iemocap from torchaudio.datasets import iemocap
from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase
LABELS = ["neu", "hap", "ang", "sad", "exc", "xxx"] LABELS = ["neu", "hap", "ang", "sad", "exc", "fru", "xxx"]
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000
...@@ -21,8 +21,6 @@ def _save_wav(filepath: str, seed: int): ...@@ -21,8 +21,6 @@ def _save_wav(filepath: str, seed: int):
def _save_label(label_folder: str, filename: str, wav_stem: str): def _save_label(label_folder: str, filename: str, wav_stem: str):
label = random.choice(LABELS) label = random.choice(LABELS)
if label == "exc":
label = "hap"
line = f"[xxx]\t{wav_stem}\t{label}\t[yyy]" line = f"[xxx]\t{wav_stem}\t{label}\t[yyy]"
filepath = os.path.join(label_folder, filename) filepath = os.path.join(label_folder, filename)
...@@ -40,19 +38,22 @@ def _get_samples(dataset_dir: str, session: int): ...@@ -40,19 +38,22 @@ def _get_samples(dataset_dir: str, session: int):
os.makedirs(wav_folder, exist_ok=True) os.makedirs(wav_folder, exist_ok=True)
os.makedirs(label_folder, exist_ok=True) os.makedirs(label_folder, exist_ok=True)
samples = []
wav_stems = [] wav_stems = []
for i in range(5): for i in range(5):
for g in ["F", "M"]: for g in ["F", "M"]:
speaker = f"Ses0{session}{g}" for utt in ["impro", "script"]:
subfolder = f"{speaker}_impro0{i}" speaker = f"Ses0{session}{g}"
subfolder_path = os.path.join(wav_folder, subfolder) subfolder = f"{speaker}_{utt}0{i}"
os.makedirs(subfolder_path, exist_ok=True) subfolder_path = os.path.join(wav_folder, subfolder)
os.makedirs(subfolder_path, exist_ok=True)
for j in range(5):
wav_stem = f"{subfolder}_F00{j}" for j in range(5):
wav_stems.append(wav_stem) wav_stem = f"{subfolder}_F00{j}"
wav_stems.append(wav_stem)
all_samples = []
impro_samples = []
script_samples = []
wav_stems = sorted(wav_stems) wav_stems = sorted(wav_stems)
for wav_stem in wav_stems: for wav_stem in wav_stems:
subfolder = wav_stem[:-5] subfolder = wav_stem[:-5]
...@@ -64,31 +65,43 @@ def _get_samples(dataset_dir: str, session: int): ...@@ -64,31 +65,43 @@ def _get_samples(dataset_dir: str, session: int):
if label == "xxx": if label == "xxx":
continue continue
sample = (wav, SAMPLE_RATE, wav_stem, label, speaker) sample = (wav, SAMPLE_RATE, wav_stem, label, speaker)
samples.append(sample) all_samples.append(sample)
if "impro" in subfolder:
impro_samples.append(sample)
else:
script_samples.append(sample)
return samples return all_samples, script_samples, impro_samples
def get_mock_dataset(dataset_dir: str): def get_mock_dataset(dataset_dir: str):
os.makedirs(dataset_dir, exist_ok=True) os.makedirs(dataset_dir, exist_ok=True)
samples = [] all_samples = []
script_samples = []
impro_samples = []
for session in range(1, 4): for session in range(1, 4):
samples += _get_samples(dataset_dir, session) samples = _get_samples(dataset_dir, session)
return samples all_samples += samples[0]
script_samples += samples[1]
impro_samples += samples[2]
return all_samples, script_samples, impro_samples
class TestIemocap(TempDirMixin, TorchaudioTestCase): class TestIemocap(TempDirMixin, TorchaudioTestCase):
root_dir = None root_dir = None
backend = "default" backend = "default"
samples = [] all_samples = []
script_samples = []
impro_samples = []
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir() cls.root_dir = cls.get_base_temp_dir()
dataset_dir = os.path.join(cls.root_dir, "IEMOCAP") dataset_dir = os.path.join(cls.root_dir, "IEMOCAP")
cls.samples = get_mock_dataset(dataset_dir) cls.all_samples, cls.script_samples, cls.impro_samples = get_mock_dataset(dataset_dir)
def _testIEMOCAP(self, dataset, samples): def _testIEMOCAP(self, dataset, samples):
num_samples = 0 num_samples = 0
...@@ -98,6 +111,14 @@ class TestIemocap(TempDirMixin, TorchaudioTestCase): ...@@ -98,6 +111,14 @@ class TestIemocap(TempDirMixin, TorchaudioTestCase):
assert num_samples == len(samples) assert num_samples == len(samples)
def testIEMOCAPDataset(self): def testIEMOCAPFullDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir) dataset = iemocap.IEMOCAP(self.root_dir)
self._testIEMOCAP(dataset, self.samples) self._testIEMOCAP(dataset, self.all_samples)
def testIEMOCAPScriptedDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="scripted")
self._testIEMOCAP(dataset, self.script_samples)
def testIEMOCAPImprovisedDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="improvised")
self._testIEMOCAP(dataset, self.impro_samples)
import os import os
import re import re
from pathlib import Path from pathlib import Path
from typing import Tuple, Union from typing import Optional, Tuple, Union
from torch import Tensor from torch import Tensor
from torch.utils.data import Dataset from torch.utils.data import Dataset
...@@ -28,12 +28,16 @@ class IEMOCAP(Dataset): ...@@ -28,12 +28,16 @@ class IEMOCAP(Dataset):
Args: Args:
root (str or Path): Root directory where the dataset's top level directory is found root (str or Path): Root directory where the dataset's top level directory is found
sessions (Tuple[int]): Tuple of sessions (1-5) to use. (Default: ``(1, 2, 3, 4, 5)``) sessions (Tuple[int]): Tuple of sessions (1-5) to use. (Default: ``(1, 2, 3, 4, 5)``)
utterance_type (str or None, optional): Which type(s) of utterances to include in the dataset.
Options: ("scripted", "improvised", ``None``). If ``None``, both scripted and improvised
data are used.
""" """
def __init__( def __init__(
self, self,
root: Union[str, Path], root: Union[str, Path],
sessions: Tuple[str] = (1, 2, 3, 4, 5), sessions: Tuple[str] = (1, 2, 3, 4, 5),
utterance_type: Optional[str] = None,
): ):
root = Path(root) root = Path(root)
self._path = root / "IEMOCAP" self._path = root / "IEMOCAP"
...@@ -41,6 +45,9 @@ class IEMOCAP(Dataset): ...@@ -41,6 +45,9 @@ class IEMOCAP(Dataset):
if not os.path.isdir(self._path): if not os.path.isdir(self._path):
raise RuntimeError("Dataset not found.") raise RuntimeError("Dataset not found.")
if utterance_type not in ["scripted", "improvised", None]:
raise ValueError("utterance_type must be one of ['scripted', 'improvised', or None]")
all_data = [] all_data = []
self.data = [] self.data = []
self.mapping = {} self.mapping = {}
...@@ -57,7 +64,12 @@ class IEMOCAP(Dataset): ...@@ -57,7 +64,12 @@ class IEMOCAP(Dataset):
# add labels # add labels
label_dir = session_dir / "dialog" / "EmoEvaluation" label_dir = session_dir / "dialog" / "EmoEvaluation"
label_paths = label_dir.glob("*.txt") query = "*.txt"
if utterance_type == "scripted":
query = "*script*.txt"
elif utterance_type == "improvised":
query = "*impro*.txt"
label_paths = label_dir.glob(query)
for label_path in label_paths: for label_path in label_paths:
with open(label_path, "r") as f: with open(label_path, "r") as f:
...@@ -67,11 +79,9 @@ class IEMOCAP(Dataset): ...@@ -67,11 +79,9 @@ class IEMOCAP(Dataset):
line = re.split("[\t\n]", line) line = re.split("[\t\n]", line)
wav_stem = line[1] wav_stem = line[1]
label = line[2] label = line[2]
if label == "exc":
label = "hap"
if wav_stem not in all_data: if wav_stem not in all_data:
continue continue
if label not in ["neu", "hap", "ang", "sad"]: if label not in ["neu", "hap", "ang", "sad", "exc", "fru"]:
continue continue
self.mapping[wav_stem] = {} self.mapping[wav_stem] = {}
self.mapping[wav_stem]["label"] = label self.mapping[wav_stem]["label"] = label
...@@ -99,7 +109,7 @@ class IEMOCAP(Dataset): ...@@ -99,7 +109,7 @@ class IEMOCAP(Dataset):
str: str:
File name File name
str: str:
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``) Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
str: str:
Speaker Speaker
""" """
...@@ -125,7 +135,7 @@ class IEMOCAP(Dataset): ...@@ -125,7 +135,7 @@ class IEMOCAP(Dataset):
str: str:
File name File name
str: str:
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``) Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
str: str:
Speaker Speaker
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment