import { phonemize as espeakng } from "phonemizer"; /** * Helper function to split a string on a regex, but keep the delimiters. * This is required, because the JavaScript `.split()` method does not keep the delimiters, * and wrapping in a capturing group causes issues with existing capturing groups (due to nesting). * @param {string} text The text to split. * @param {RegExp} regex The regex to split on. * @returns {{match: boolean; text: string}[]} The split string. */ function split(text, regex) { const result = []; let prev = 0; for (const match of text.matchAll(regex)) { const fullMatch = match[0]; if (prev < match.index) { result.push({ match: false, text: text.slice(prev, match.index) }); } if (fullMatch.length > 0) { result.push({ match: true, text: fullMatch }); } prev = match.index + fullMatch.length; } if (prev < text.length) { result.push({ match: false, text: text.slice(prev) }); } return result; } /** * Helper function to split numbers into phonetic equivalents * @param {string} match The matched number * @returns {string} The phonetic equivalent */ function split_num(match) { if (match.includes(".")) { return match; } else if (match.includes(":")) { let [h, m] = match.split(":").map(Number); if (m === 0) { return `${h} o'clock`; } else if (m < 10) { return `${h} oh ${m}`; } return `${h} ${m}`; } let year = parseInt(match.slice(0, 4), 10); if (year < 1100 || year % 1000 < 10) { return match; } let left = match.slice(0, 2); let right = parseInt(match.slice(2, 4), 10); let suffix = match.endsWith("s") ? "s" : ""; if (year % 1000 >= 100 && year % 1000 <= 999) { if (right === 0) { return `${left} hundred${suffix}`; } else if (right < 10) { return `${left} oh ${right}${suffix}`; } } return `${left} ${right}${suffix}`; } /** * Helper function to format monetary values * @param {string} match The matched currency * @returns {string} The formatted currency */ function flip_money(match) { const bill = match[0] === "$" ? "dollar" : "pound"; if (isNaN(Number(match.slice(1)))) { return `${match.slice(1)} ${bill}s`; } else if (!match.includes(".")) { let suffix = match.slice(1) === "1" ? "" : "s"; return `${match.slice(1)} ${bill}${suffix}`; } const [b, c] = match.slice(1).split("."); const d = parseInt(c.padEnd(2, "0"), 10); let coins = match[0] === "$" ? (d === 1 ? "cent" : "cents") : d === 1 ? "penny" : "pence"; return `${b} ${bill}${b === "1" ? "" : "s"} and ${d} ${coins}`; } /** * Helper function to process decimal numbers * @param {string} match The matched number * @returns {string} The formatted number */ function point_num(match) { let [a, b] = match.split("."); return `${a} point ${b.split("").join(" ")}`; } /** * Normalize text for phonemization * @param {string} text The text to normalize * @returns {string} The normalized text */ function normalize_text(text) { return ( text // 1. Handle quotes and brackets .replace(/[‘’]/g, "'") .replace(/«/g, "“") .replace(/»/g, "”") .replace(/[“”]/g, '"') .replace(/\(/g, "«") .replace(/\)/g, "»") // 2. Replace uncommon punctuation marks .replace(/、/g, ", ") .replace(/。/g, ". ") .replace(/!/g, "! ") .replace(/,/g, ", ") .replace(/:/g, ": ") .replace(/;/g, "; ") .replace(/?/g, "? ") // 3. Whitespace normalization .replace(/[^\S \n]/g, " ") .replace(/ +/, " ") .replace(/(?<=\n) +(?=\n)/g, "") // 4. Abbreviations .replace(/\bD[Rr]\.(?= [A-Z])/g, "Doctor") .replace(/\b(?:Mr\.|MR\.(?= [A-Z]))/g, "Mister") .replace(/\b(?:Ms\.|MS\.(?= [A-Z]))/g, "Miss") .replace(/\b(?:Mrs\.|MRS\.(?= [A-Z]))/g, "Mrs") .replace(/\betc\.(?! [A-Z])/gi, "etc") // 5. Normalize casual words .replace(/\b(y)eah?\b/gi, "$1e'a") // 5. Handle numbers and currencies .replace(/\d*\.\d+|\b\d{4}s?\b|(? m.replace(/\./g, "-")) .replace(/(?<=[A-Z])\.(?=[A-Z])/gi, "-") // 8. Strip leading and trailing whitespace .trim() ); } /** * Escapes regular expression special characters from a string by replacing them with their escaped counterparts. * * @param {string} string The string to escape. * @returns {string} The escaped string. */ function escapeRegExp(string) { return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string } const PUNCTUATION = ';:,.!?¡¿—…"«»“”(){}[]'; const PUNCTUATION_PATTERN = new RegExp(`(\\s*[${escapeRegExp(PUNCTUATION)}]+\\s*)+`, "g"); /** * Phonemize text using the eSpeak-NG phonemizer * @param {string} text The text to phonemize * @param {"a"|"b"} language The language to use * @param {boolean} norm Whether to normalize the text * @returns {Promise} The phonemized text */ export async function phonemize(text, language = "a", norm = true) { // 1. Normalize text if (norm) { text = normalize_text(text); } // 2. Split into chunks, to ensure we preserve punctuation const sections = split(text, PUNCTUATION_PATTERN); // 3. Convert each section to phonemes const lang = language === "a" ? "en-us" : "en"; const ps = (await Promise.all(sections.map(async ({ match, text }) => (match ? text : (await espeakng(text, lang)).join(" "))))).join(""); // 4. Post-process phonemes let processed = ps // https://en.wiktionary.org/wiki/kokoro#English .replace(/kəkˈoːɹoʊ/g, "kˈoʊkəɹoʊ") .replace(/kəkˈɔːɹəʊ/g, "kˈəʊkəɹəʊ") .replace(/ʲ/g, "j") .replace(/r/g, "ɹ") .replace(/x/g, "k") .replace(/ɬ/g, "l") .replace(/(?<=[a-zɹː])(?=hˈʌndɹɪd)/g, " ") .replace(/ z(?=[;:,.!?¡¿—…"«»“” ]|$)/g, "z"); // 5. Additional post-processing for American English if (language === "a") { processed = processed.replace(/(?<=nˈaɪn)ti(?!ː)/g, "di"); } return processed.trim(); }