Commit 34255386 authored by Caroline Chen's avatar Caroline Chen Committed by Facebook GitHub Bot
Browse files

Add iemocap variants (#2778)

Summary:
add ability to load only improvised or only scripted utterances.

Pull Request resolved: https://github.com/pytorch/audio/pull/2778

Reviewed By: nateanl

Differential Revision: D40511865

Pulled By: carolineechen

fbshipit-source-id: e1fe3908ac2aa306ad30c242ddd25762b2268539
parent 9135b544
......@@ -4,7 +4,7 @@ import random
from torchaudio.datasets import iemocap
from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase
LABELS = ["neu", "hap", "ang", "sad", "exc", "xxx"]
LABELS = ["neu", "hap", "ang", "sad", "exc", "fru", "xxx"]
SAMPLE_RATE = 16000
......@@ -21,8 +21,6 @@ def _save_wav(filepath: str, seed: int):
def _save_label(label_folder: str, filename: str, wav_stem: str):
label = random.choice(LABELS)
if label == "exc":
label = "hap"
line = f"[xxx]\t{wav_stem}\t{label}\t[yyy]"
filepath = os.path.join(label_folder, filename)
......@@ -40,12 +38,12 @@ def _get_samples(dataset_dir: str, session: int):
os.makedirs(wav_folder, exist_ok=True)
os.makedirs(label_folder, exist_ok=True)
samples = []
wav_stems = []
for i in range(5):
for g in ["F", "M"]:
for utt in ["impro", "script"]:
speaker = f"Ses0{session}{g}"
subfolder = f"{speaker}_impro0{i}"
subfolder = f"{speaker}_{utt}0{i}"
subfolder_path = os.path.join(wav_folder, subfolder)
os.makedirs(subfolder_path, exist_ok=True)
......@@ -53,6 +51,9 @@ def _get_samples(dataset_dir: str, session: int):
wav_stem = f"{subfolder}_F00{j}"
wav_stems.append(wav_stem)
all_samples = []
impro_samples = []
script_samples = []
wav_stems = sorted(wav_stems)
for wav_stem in wav_stems:
subfolder = wav_stem[:-5]
......@@ -64,31 +65,43 @@ def _get_samples(dataset_dir: str, session: int):
if label == "xxx":
continue
sample = (wav, SAMPLE_RATE, wav_stem, label, speaker)
samples.append(sample)
all_samples.append(sample)
return samples
if "impro" in subfolder:
impro_samples.append(sample)
else:
script_samples.append(sample)
return all_samples, script_samples, impro_samples
def get_mock_dataset(dataset_dir: str):
os.makedirs(dataset_dir, exist_ok=True)
samples = []
all_samples = []
script_samples = []
impro_samples = []
for session in range(1, 4):
samples += _get_samples(dataset_dir, session)
return samples
samples = _get_samples(dataset_dir, session)
all_samples += samples[0]
script_samples += samples[1]
impro_samples += samples[2]
return all_samples, script_samples, impro_samples
class TestIemocap(TempDirMixin, TorchaudioTestCase):
root_dir = None
backend = "default"
samples = []
all_samples = []
script_samples = []
impro_samples = []
@classmethod
def setUpClass(cls):
cls.root_dir = cls.get_base_temp_dir()
dataset_dir = os.path.join(cls.root_dir, "IEMOCAP")
cls.samples = get_mock_dataset(dataset_dir)
cls.all_samples, cls.script_samples, cls.impro_samples = get_mock_dataset(dataset_dir)
def _testIEMOCAP(self, dataset, samples):
num_samples = 0
......@@ -98,6 +111,14 @@ class TestIemocap(TempDirMixin, TorchaudioTestCase):
assert num_samples == len(samples)
def testIEMOCAPDataset(self):
def testIEMOCAPFullDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir)
self._testIEMOCAP(dataset, self.samples)
self._testIEMOCAP(dataset, self.all_samples)
def testIEMOCAPScriptedDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="scripted")
self._testIEMOCAP(dataset, self.script_samples)
def testIEMOCAPImprovisedDataset(self):
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="improvised")
self._testIEMOCAP(dataset, self.impro_samples)
import os
import re
from pathlib import Path
from typing import Tuple, Union
from typing import Optional, Tuple, Union
from torch import Tensor
from torch.utils.data import Dataset
......@@ -28,12 +28,16 @@ class IEMOCAP(Dataset):
Args:
root (str or Path): Root directory where the dataset's top level directory is found
sessions (Tuple[int]): Tuple of sessions (1-5) to use. (Default: ``(1, 2, 3, 4, 5)``)
utterance_type (str or None, optional): Which type(s) of utterances to include in the dataset.
Options: ("scripted", "improvised", ``None``). If ``None``, both scripted and improvised
data are used.
"""
def __init__(
self,
root: Union[str, Path],
sessions: Tuple[str] = (1, 2, 3, 4, 5),
utterance_type: Optional[str] = None,
):
root = Path(root)
self._path = root / "IEMOCAP"
......@@ -41,6 +45,9 @@ class IEMOCAP(Dataset):
if not os.path.isdir(self._path):
raise RuntimeError("Dataset not found.")
if utterance_type not in ["scripted", "improvised", None]:
raise ValueError("utterance_type must be one of ['scripted', 'improvised', or None]")
all_data = []
self.data = []
self.mapping = {}
......@@ -57,7 +64,12 @@ class IEMOCAP(Dataset):
# add labels
label_dir = session_dir / "dialog" / "EmoEvaluation"
label_paths = label_dir.glob("*.txt")
query = "*.txt"
if utterance_type == "scripted":
query = "*script*.txt"
elif utterance_type == "improvised":
query = "*impro*.txt"
label_paths = label_dir.glob(query)
for label_path in label_paths:
with open(label_path, "r") as f:
......@@ -67,11 +79,9 @@ class IEMOCAP(Dataset):
line = re.split("[\t\n]", line)
wav_stem = line[1]
label = line[2]
if label == "exc":
label = "hap"
if wav_stem not in all_data:
continue
if label not in ["neu", "hap", "ang", "sad"]:
if label not in ["neu", "hap", "ang", "sad", "exc", "fru"]:
continue
self.mapping[wav_stem] = {}
self.mapping[wav_stem]["label"] = label
......@@ -99,7 +109,7 @@ class IEMOCAP(Dataset):
str:
File name
str:
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``)
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
str:
Speaker
"""
......@@ -125,7 +135,7 @@ class IEMOCAP(Dataset):
str:
File name
str:
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``)
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
str:
Speaker
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment