normalise some more

b73698c1 · sanchit-gandhi · b62f9f1e · b73698c1
Commit b73698c1 authored Feb 16, 2024 by sanchit-gandhi
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 3 deletions

run_audio_classification.py run_audio_classification.py +11 -3

No files found.
--- a/run_audio_classification.py
+++ b/run_audio_classification.py
@@ -57,19 +57,27 @@ def random_subsample(wav: np.ndarray, max_length: float, sample_rate: int = 1600
    return wav[random_offset : random_offset + sample_length]
+ACCENT_MAPPING = {
+    "British": "English",
+    "Canadian": "American",
+    "Northern irish": "Irish",
+    "New zealand": "Australian",
+    "Pakistani": "Indian",
+}
 def preprocess_labels(label: str) -> str:
    """Apply pre-processing formatting to the accent labels"""
    if "_" in label:
        # voxpopuli stylises the accent as a language code (e.g. en_pl for "polish") - convert to full accent
        language_code = label.split("_")[-1]
        label = LANGUAGES[language_code]
-    if label == "British":
-        # 1 speaker in VCTK is labelled as British instead of English - let's normalise
-        label = "English"
    # VCTK labels for two words are concatenated into one (NewZeleand-> New Zealand)
    label = re.sub(r"(\w)([A-Z])", r"\1 \2", label)
    # convert Whisper language code (polish) to capitalised (Polish)
    label = label.capitalize()
+    if label in ACCENT_MAPPING:
+        label = ACCENT_MAPPING[label]
    return label