Commit 033f82a9 authored by guobj's avatar guobj
Browse files

init

2025/04/10 15:55:52
parent ef72564b
This diff is collapsed.
{
"name": "kokoro-web",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"kokoro-js": "file:..",
"motion": "^11.12.0",
"react": "^18.3.1",
"react-dom": "^18.3.1"
},
"devDependencies": {
"@eslint/js": "^9.15.0",
"@types/react": "^18.3.12",
"@types/react-dom": "^18.3.1",
"@vitejs/plugin-react": "^4.3.4",
"autoprefixer": "^10.4.20",
"eslint": "^9.15.0",
"eslint-plugin-react": "^7.37.2",
"eslint-plugin-react-hooks": "^5.0.0",
"eslint-plugin-react-refresh": "^0.4.14",
"globals": "^15.12.0",
"postcss": "^8.4.49",
"tailwindcss": "^3.4.15",
"vite": "^6.0.1"
}
}
export default {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
};
This diff is collapsed.
<svg xmlns="http://www.w3.org/2000/svg" width="1600" height="198">
<defs>
<linearGradient id="a" x1="50%" x2="50%" y1="-10.959%" y2="100%">
<stop stop-color="#57BBC1" stop-opacity=".25" offset="0%"/>
<stop stop-color="#015871" offset="100%"/>
</linearGradient>
</defs>
<path fill="url(#a)" fill-rule="evenodd" d="M.005 121C311 121 409.898-.25 811 0c400 0 500 121 789 121v77H0s.005-48 .005-77z" transform="matrix(-1 0 0 1 1600 0)"/>
</svg>
import { useRef, useState, useEffect } from "react";
import { motion } from "motion/react";
export default function App() {
// Create a reference to the worker object.
const worker = useRef(null);
const [inputText, setInputText] = useState("Life is like a box of chocolates. You never know what you're gonna get.");
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
const [voices, setVoices] = useState([]);
const [status, setStatus] = useState(null);
const [error, setError] = useState(null);
const [loadingMessage, setLoadingMessage] = useState("Loading...");
const [results, setResults] = useState([]);
// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
useEffect(() => {
// Create the worker if it does not yet exist.
worker.current ??= new Worker(new URL("./worker.js", import.meta.url), {
type: "module",
});
// Create a callback function for messages from the worker thread.
const onMessageReceived = (e) => {
switch (e.data.status) {
case "device":
setLoadingMessage(`Loading model (device="${e.data.device}")`);
break;
case "ready":
setStatus("ready");
setVoices(e.data.voices);
break;
case "error":
setError(e.data.data);
break;
case "complete":
const { audio, text } = e.data;
// Generation complete: re-enable the "Generate" button
setResults((prev) => [{ text, src: audio }, ...prev]);
setStatus("ready");
break;
}
};
const onErrorReceived = (e) => {
console.error("Worker error:", e);
setError(e.message);
};
// Attach the callback function as an event listener.
worker.current.addEventListener("message", onMessageReceived);
worker.current.addEventListener("error", onErrorReceived);
// Define a cleanup function for when the component is unmounted.
return () => {
worker.current.removeEventListener("message", onMessageReceived);
worker.current.removeEventListener("error", onErrorReceived);
};
}, []);
const handleSubmit = (e) => {
e.preventDefault();
setStatus("running");
worker.current.postMessage({
type: "generate",
text: inputText.trim(),
voice: selectedSpeaker,
});
};
return (
<div className="relative w-full min-h-screen bg-gradient-to-br from-gray-900 to-gray-700 flex flex-col items-center justify-center p-4 relative overflow-hidden font-sans">
<motion.div initial={{ opacity: 1 }} animate={{ opacity: status === null ? 1 : 0 }} transition={{ duration: 0.5 }} className="absolute w-screen h-screen justify-center flex flex-col items-center z-10 bg-gray-800/95 backdrop-blur-md" style={{ pointerEvents: status === null ? "auto" : "none" }}>
<div className="w-[250px] h-[250px] border-4 border-white shadow-[0_0_0_5px_#4973ff] rounded-full overflow-hidden">
<div className="loading-wave"></div>
</div>
<p className={`text-3xl my-5 text-center ${error ? "text-red-500" : "text-white"}`}>{error ?? loadingMessage}</p>
</motion.div>
<div className="max-w-3xl w-full space-y-8 relative z-[2]">
<div className="text-center">
<h1 className="text-5xl font-extrabold text-gray-100 mb-2 drop-shadow-lg font-heading">Kokoro Text-to-Speech</h1>
<p className="text-2xl text-gray-300 font-semibold font-subheading">
Powered by&nbsp;
<a href="https://github.com/hexgrad/kokoro" target="_blank" rel="noreferrer" className="underline">
Kokoro
</a>
&nbsp;and&nbsp;
<a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">
<img width="40" src="hf-logo.svg" className="inline translate-y-[-2px] me-1"></img>Transformers.js
</a>
</p>
</div>
<div className="bg-gray-800/50 backdrop-blur-sm border border-gray-700 rounded-lg p-6">
<form onSubmit={handleSubmit} className="space-y-4">
<textarea placeholder="Enter text..." value={inputText} onChange={(e) => setInputText(e.target.value)} className="w-full min-h-[100px] max-h-[300px] bg-gray-700/50 backdrop-blur-sm border-2 border-gray-600 rounded-xl resize-y text-gray-100 placeholder-gray-400 px-3 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent" rows={Math.min(8, inputText.split("\n").length)} />
<div className="flex flex-col items-center space-y-4">
<select value={selectedSpeaker} onChange={(e) => setSelectedSpeaker(e.target.value)} className="w-full bg-gray-700/50 backdrop-blur-sm border-2 border-gray-600 rounded-xl text-gray-100 px-3 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent">
{Object.entries(voices).map(([id, voice]) => (
<option key={id} value={id}>
{voice.name} ({voice.language === "en-us" ? "American" : "British"} {voice.gender})
</option>
))}
</select>
<button type="submit" className="inline-flex justify-center items-center px-6 py-2 text-lg font-semibold bg-gradient-to-t from-blue-600 to-purple-600 hover:from-blue-700 hover:to-purple-700 transition-colors duration-300 rounded-xl text-white disabled:opacity-50" disabled={status === "running" || inputText.trim() === ""}>
{status === "running" ? "Generating..." : "Generate"}
</button>
</div>
</form>
</div>
{results.length > 0 && (
<motion.div initial={{ y: 50, opacity: 0 }} animate={{ y: 0, opacity: 1 }} transition={{ duration: 0.5 }} className="max-h-[250px] overflow-y-auto px-2 mt-4 space-y-6 relative z-[2]">
{results.map((result, i) => (
<div key={i}>
<div className="text-white bg-gray-800/70 backdrop-blur-sm border border-gray-700 rounded-lg p-4 z-10">
<span className="absolute right-5 font-bold">#{results.length - i}</span>
<p className="mb-3 max-w-[95%]">{result.text}</p>
<audio controls src={result.src} className="w-full">
Your browser does not support the audio element.
</audio>
</div>
</div>
))}
</motion.div>
)}
</div>
<div className="bg-[#015871] pointer-events-none absolute left-0 w-full h-[5%] bottom-[-50px]">
<div className="wave"></div>
<div className="wave"></div>
</div>
</div>
);
}
@tailwind base;
@tailwind components;
@tailwind utilities;
/*
* Wave animations adapted from the following two demos:
* - https://codepen.io/upasanaasopa/pen/poObEWZ
* - https://codepen.io/breakstorm00/pen/qBJZQNB
*/
*,
*:before,
*:after {
margin: 0;
padding: 0;
box-sizing: border-box;
}
.loading-wave {
position: relative;
top: 0;
width: 100%;
height: 100%;
background: #2c74b3;
border-radius: 50%;
box-shadow: inset 0 0 50px 0 rgba(0, 0, 0, 0.5);
}
.loading-wave:before,
.loading-wave:after {
content: "";
position: absolute;
top: 0;
left: 50%;
width: 200%;
height: 200%;
background: black;
transform: translate(-50%, -75%);
}
.loading-wave:before {
border-radius: 45%;
background: rgba(255, 255, 255, 1);
animation: animate 5s linear infinite;
}
.loading-wave:after {
border-radius: 40%;
background: rgba(255, 255, 255, 0.5);
animation: animate 10s linear infinite;
}
.wave {
background: url(/wave.svg) repeat-x;
position: absolute;
top: -198px;
width: 6400px;
height: 198px;
animation: wave 7s cubic-bezier(0.36, 0.45, 0.63, 0.53) infinite;
transform: translate3d(0, 0, 0);
}
.wave:nth-of-type(2) {
top: -175px;
animation:
wave 7s cubic-bezier(0.36, 0.45, 0.63, 0.53) -0.125s infinite,
swell 7s ease -1.25s infinite;
opacity: 1;
}
@keyframes wave {
0% {
margin-left: 0;
}
100% {
margin-left: -1600px;
}
}
@keyframes swell {
0%,
100% {
transform: translate3d(0, -25px, 0);
}
50% {
transform: translate3d(0, 5px, 0);
}
}
@keyframes animate {
0% {
transform: translate(-50%, -75%) rotate(0deg);
}
100% {
transform: translate(-50%, -75%) rotate(360deg);
}
}
import { StrictMode } from "react";
import { createRoot } from "react-dom/client";
import "./index.css";
import App from "./App.jsx";
createRoot(document.getElementById("root")).render(
<StrictMode>
<App />
</StrictMode>,
);
export async function detectWebGPU() {
try {
const adapter = await navigator.gpu.requestAdapter();
return !!adapter;
} catch (e) {
return false;
}
}
import { KokoroTTS } from "kokoro-js";
import { detectWebGPU } from "./utils.js";
// Device detection
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
self.postMessage({ status: "device", device });
// Load the model
const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
const tts = await KokoroTTS.from_pretrained(model_id, {
dtype: device === "wasm" ? "q8" : "fp32",
device,
}).catch((e) => {
self.postMessage({ status: "error", error: e.message });
throw e;
});
self.postMessage({ status: "ready", voices: tts.voices, device });
// Listen for messages from the main thread
self.addEventListener("message", async (e) => {
const { text, voice } = e.data;
// Generate speech
const audio = await tts.generate(text, { voice });
// Send the audio file back to the main thread
const blob = audio.toBlob();
self.postMessage({ status: "complete", audio: URL.createObjectURL(blob), text });
});
/** @type {import('tailwindcss').Config} */
export default {
content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"],
theme: {
extend: {},
},
plugins: [],
};
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";
// https://vite.dev/config/
export default defineConfig({
plugins: [react()],
worker: { format: "es" },
build: {
target: "esnext",
},
logLevel: process.env.NODE_ENV === "development" ? "error" : "info",
});
This diff is collapsed.
{
"name": "kokoro-js",
"version": "1.2.0",
"type": "module",
"exports": {
"types": "./types/kokoro.d.ts",
"node": {
"import": "./dist/kokoro.js",
"require": "./dist/kokoro.cjs"
},
"default": "./dist/kokoro.web.js"
},
"scripts": {
"build": "rm -rf dist types && rollup -c && tsc && cp ../LICENSE LICENSE",
"format": "prettier --write . --print-width 1000",
"test": "vitest run"
},
"keywords": [
"kokoro",
"tts",
"text-to-speech"
],
"author": {
"name": "hexgrad",
"email": "hello@hexgrad.com"
},
"browser": {
"path": false,
"fs/promises": false
},
"contributors": [
"Xenova"
],
"license": "Apache-2.0",
"description": "High-quality text-to-speech for the web",
"dependencies": {
"@huggingface/transformers": "^3.3.3",
"phonemizer": "^1.2.1"
},
"devDependencies": {
"@rollup/plugin-node-resolve": "^16.0.0",
"@rollup/plugin-terser": "^0.4.4",
"prettier": "3.4.2",
"rollup": "^4.30.1",
"typescript": "^5.7.3",
"vitest": "^2.1.8"
},
"files": [
"types",
"dist",
"voices",
"README.md",
"LICENSE"
],
"homepage": "https://github.com/hexgrad/kokoro",
"repository": {
"type": "git",
"url": "git+https://github.com/hexgrad/kokoro.git"
},
"publishConfig": {
"access": "public"
},
"jsdelivr": "./dist/kokoro.web.js",
"unpkg": "./dist/kokoro.web.js"
}
import terser from "@rollup/plugin-terser";
import { nodeResolve } from "@rollup/plugin-node-resolve";
const plugins = (browser) => [nodeResolve({ browser }), terser({ format: { comments: false } })];
const OUTPUT_CONFIGS = [
// Node versions
{
file: "./dist/kokoro.cjs",
format: "cjs",
},
{
file: "./dist/kokoro.js",
format: "esm",
},
// Web version
{
file: "./dist/kokoro.web.js",
format: "esm",
},
];
const WEB_SPECIFIC_CONFIG = {
onwarn: (warning, warn) => {
if (!warning.message.includes("@huggingface/transformers")) warn(warning);
},
};
const NODE_SPECIFIC_CONFIG = {
external: ["@huggingface/transformers", "phonemizer"],
};
export default OUTPUT_CONFIGS.map((output) => {
const web = output.file.endsWith(".web.js");
return {
input: "./src/kokoro.js",
output,
plugins: plugins(web),
...(web ? WEB_SPECIFIC_CONFIG : NODE_SPECIFIC_CONFIG),
};
});
import { StyleTextToSpeech2Model, AutoTokenizer, Tensor, RawAudio } from "@huggingface/transformers";
import { phonemize } from "./phonemize.js";
import { TextSplitterStream } from "./splitter.js";
import { getVoiceData, VOICES } from "./voices.js";
const STYLE_DIM = 256;
const SAMPLE_RATE = 24000;
/**
* @typedef {Object} GenerateOptions
* @property {keyof typeof VOICES} [voice="af_heart"] The voice
* @property {number} [speed=1] The speaking speed
*/
/**
* @typedef {Object} StreamProperties
* @property {RegExp} [split_pattern] The pattern to split the input text. If unset, the default sentence splitter will be used.
* @typedef {GenerateOptions & StreamProperties} StreamGenerateOptions
*/
export class KokoroTTS {
/**
* Create a new KokoroTTS instance.
* @param {import('@huggingface/transformers').StyleTextToSpeech2Model} model The model
* @param {import('@huggingface/transformers').PreTrainedTokenizer} tokenizer The tokenizer
*/
constructor(model, tokenizer) {
this.model = model;
this.tokenizer = tokenizer;
}
/**
* Load a KokoroTTS model from the Hugging Face Hub.
* @param {string} model_id The model id
* @param {Object} options Additional options
* @param {"fp32"|"fp16"|"q8"|"q4"|"q4f16"} [options.dtype="fp32"] The data type to use.
* @param {"wasm"|"webgpu"|"cpu"|null} [options.device=null] The device to run the model on.
* @param {import("@huggingface/transformers").ProgressCallback} [options.progress_callback=null] A callback function that is called with progress information.
* @returns {Promise<KokoroTTS>} The loaded model
*/
static async from_pretrained(model_id, { dtype = "fp32", device = null, progress_callback = null } = {}) {
const model = StyleTextToSpeech2Model.from_pretrained(model_id, { progress_callback, dtype, device });
const tokenizer = AutoTokenizer.from_pretrained(model_id, { progress_callback });
const info = await Promise.all([model, tokenizer]);
return new KokoroTTS(...info);
}
get voices() {
return VOICES;
}
list_voices() {
console.table(VOICES);
}
_validate_voice(voice) {
if (!VOICES.hasOwnProperty(voice)) {
console.error(`Voice "${voice}" not found. Available voices:`);
console.table(VOICES);
throw new Error(`Voice "${voice}" not found. Should be one of: ${Object.keys(VOICES).join(", ")}.`);
}
const language = /** @type {"a"|"b"} */ (voice.at(0)); // "a" or "b"
return language;
}
/**
* Generate audio from text.
*
* @param {string} text The input text
* @param {GenerateOptions} options Additional options
* @returns {Promise<RawAudio>} The generated audio
*/
async generate(text, { voice = "af_heart", speed = 1 } = {}) {
const language = this._validate_voice(voice);
const phonemes = await phonemize(text, language);
const { input_ids } = this.tokenizer(phonemes, {
truncation: true,
});
return this.generate_from_ids(input_ids, { voice, speed });
}
/**
* Generate audio from input ids.
* @param {Tensor} input_ids The input ids
* @param {GenerateOptions} options Additional options
* @returns {Promise<RawAudio>} The generated audio
*/
async generate_from_ids(input_ids, { voice = "af_heart", speed = 1 } = {}) {
// Select voice style based on number of input tokens
const num_tokens = Math.min(Math.max(input_ids.dims.at(-1) - 2, 0), 509);
// Load voice style
const data = await getVoiceData(voice);
const offset = num_tokens * STYLE_DIM;
const voiceData = data.slice(offset, offset + STYLE_DIM);
// Prepare model inputs
const inputs = {
input_ids,
style: new Tensor("float32", voiceData, [1, STYLE_DIM]),
speed: new Tensor("float32", [speed], [1]),
};
// Generate audio
const { waveform } = await this.model(inputs);
return new RawAudio(waveform.data, SAMPLE_RATE);
}
/**
* Generate audio from text in a streaming fashion.
* @param {string|TextSplitterStream} text The input text
* @param {StreamGenerateOptions} options Additional options
* @returns {AsyncGenerator<{text: string, phonemes: string, audio: RawAudio}, void, void>}
*/
async *stream(text, { voice = "af_heart", speed = 1, split_pattern = null } = {}) {
const language = this._validate_voice(voice);
/** @type {TextSplitterStream} */
let splitter;
if (text instanceof TextSplitterStream) {
splitter = text;
} else if (typeof text === "string") {
splitter = new TextSplitterStream();
const chunks = split_pattern
? text
.split(split_pattern)
.map((chunk) => chunk.trim())
.filter((chunk) => chunk.length > 0)
: [text];
splitter.push(...chunks);
} else {
throw new Error("Invalid input type. Expected string or TextSplitterStream.");
}
for await (const sentence of splitter) {
const phonemes = await phonemize(sentence, language);
const { input_ids } = this.tokenizer(phonemes, {
truncation: true,
});
// TODO: There may be some cases where - even with splitting - the text is too long.
// In that case, we should split the text into smaller chunks and process them separately.
// For now, we just truncate these exceptionally long chunks
const audio = await this.generate_from_ids(input_ids, { voice, speed });
yield { text: sentence, phonemes, audio };
}
}
}
export { TextSplitterStream };
import { phonemize as espeakng } from "phonemizer";
/**
* Helper function to split a string on a regex, but keep the delimiters.
* This is required, because the JavaScript `.split()` method does not keep the delimiters,
* and wrapping in a capturing group causes issues with existing capturing groups (due to nesting).
* @param {string} text The text to split.
* @param {RegExp} regex The regex to split on.
* @returns {{match: boolean; text: string}[]} The split string.
*/
function split(text, regex) {
const result = [];
let prev = 0;
for (const match of text.matchAll(regex)) {
const fullMatch = match[0];
if (prev < match.index) {
result.push({ match: false, text: text.slice(prev, match.index) });
}
if (fullMatch.length > 0) {
result.push({ match: true, text: fullMatch });
}
prev = match.index + fullMatch.length;
}
if (prev < text.length) {
result.push({ match: false, text: text.slice(prev) });
}
return result;
}
/**
* Helper function to split numbers into phonetic equivalents
* @param {string} match The matched number
* @returns {string} The phonetic equivalent
*/
function split_num(match) {
if (match.includes(".")) {
return match;
} else if (match.includes(":")) {
let [h, m] = match.split(":").map(Number);
if (m === 0) {
return `${h} o'clock`;
} else if (m < 10) {
return `${h} oh ${m}`;
}
return `${h} ${m}`;
}
let year = parseInt(match.slice(0, 4), 10);
if (year < 1100 || year % 1000 < 10) {
return match;
}
let left = match.slice(0, 2);
let right = parseInt(match.slice(2, 4), 10);
let suffix = match.endsWith("s") ? "s" : "";
if (year % 1000 >= 100 && year % 1000 <= 999) {
if (right === 0) {
return `${left} hundred${suffix}`;
} else if (right < 10) {
return `${left} oh ${right}${suffix}`;
}
}
return `${left} ${right}${suffix}`;
}
/**
* Helper function to format monetary values
* @param {string} match The matched currency
* @returns {string} The formatted currency
*/
function flip_money(match) {
const bill = match[0] === "$" ? "dollar" : "pound";
if (isNaN(Number(match.slice(1)))) {
return `${match.slice(1)} ${bill}s`;
} else if (!match.includes(".")) {
let suffix = match.slice(1) === "1" ? "" : "s";
return `${match.slice(1)} ${bill}${suffix}`;
}
const [b, c] = match.slice(1).split(".");
const d = parseInt(c.padEnd(2, "0"), 10);
let coins = match[0] === "$" ? (d === 1 ? "cent" : "cents") : d === 1 ? "penny" : "pence";
return `${b} ${bill}${b === "1" ? "" : "s"} and ${d} ${coins}`;
}
/**
* Helper function to process decimal numbers
* @param {string} match The matched number
* @returns {string} The formatted number
*/
function point_num(match) {
let [a, b] = match.split(".");
return `${a} point ${b.split("").join(" ")}`;
}
/**
* Normalize text for phonemization
* @param {string} text The text to normalize
* @returns {string} The normalized text
*/
function normalize_text(text) {
return (
text
// 1. Handle quotes and brackets
.replace(/[‘’]/g, "'")
.replace(/«/g, "")
.replace(/»/g, "")
.replace(/[“”]/g, '"')
.replace(/\(/g, "«")
.replace(/\)/g, "»")
// 2. Replace uncommon punctuation marks
.replace(/、/g, ", ")
.replace(/。/g, ". ")
.replace(/!/g, "! ")
.replace(/,/g, ", ")
.replace(/:/g, ": ")
.replace(/;/g, "; ")
.replace(/?/g, "? ")
// 3. Whitespace normalization
.replace(/[^\S \n]/g, " ")
.replace(/ +/, " ")
.replace(/(?<=\n) +(?=\n)/g, "")
// 4. Abbreviations
.replace(/\bD[Rr]\.(?= [A-Z])/g, "Doctor")
.replace(/\b(?:Mr\.|MR\.(?= [A-Z]))/g, "Mister")
.replace(/\b(?:Ms\.|MS\.(?= [A-Z]))/g, "Miss")
.replace(/\b(?:Mrs\.|MRS\.(?= [A-Z]))/g, "Mrs")
.replace(/\betc\.(?! [A-Z])/gi, "etc")
// 5. Normalize casual words
.replace(/\b(y)eah?\b/gi, "$1e'a")
// 5. Handle numbers and currencies
.replace(/\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)/g, split_num)
.replace(/(?<=\d),(?=\d)/g, "")
.replace(/[]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[]\d+\.\d\d?\b/gi, flip_money)
.replace(/\d*\.\d+/g, point_num)
.replace(/(?<=\d)-(?=\d)/g, " to ")
.replace(/(?<=\d)S/g, " S")
// 6. Handle possessives
.replace(/(?<=[BCDFGHJ-NP-TV-Z])'?s\b/g, "'S")
.replace(/(?<=X')S\b/g, "s")
// 7. Handle hyphenated words/letters
.replace(/(?:[A-Za-z]\.){2,} [a-z]/g, (m) => m.replace(/\./g, "-"))
.replace(/(?<=[A-Z])\.(?=[A-Z])/gi, "-")
// 8. Strip leading and trailing whitespace
.trim()
);
}
/**
* Escapes regular expression special characters from a string by replacing them with their escaped counterparts.
*
* @param {string} string The string to escape.
* @returns {string} The escaped string.
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
}
const PUNCTUATION = ';:,.!?¡¿—…"«»“”(){}[]';
const PUNCTUATION_PATTERN = new RegExp(`(\\s*[${escapeRegExp(PUNCTUATION)}]+\\s*)+`, "g");
/**
* Phonemize text using the eSpeak-NG phonemizer
* @param {string} text The text to phonemize
* @param {"a"|"b"} language The language to use
* @param {boolean} norm Whether to normalize the text
* @returns {Promise<string>} The phonemized text
*/
export async function phonemize(text, language = "a", norm = true) {
// 1. Normalize text
if (norm) {
text = normalize_text(text);
}
// 2. Split into chunks, to ensure we preserve punctuation
const sections = split(text, PUNCTUATION_PATTERN);
// 3. Convert each section to phonemes
const lang = language === "a" ? "en-us" : "en";
const ps = (await Promise.all(sections.map(async ({ match, text }) => (match ? text : (await espeakng(text, lang)).join(" "))))).join("");
// 4. Post-process phonemes
let processed = ps
// https://en.wiktionary.org/wiki/kokoro#English
.replace(/kəkˈoːɹoʊ/g, "kˈoʊkəɹoʊ")
.replace(/kəkˈɔːɹəʊ/g, "kˈəʊkəɹəʊ")
.replace(/ʲ/g, "j")
.replace(/r/g, "ɹ")
.replace(/x/g, "k")
.replace(/ɬ/g, "l")
.replace(/(?<=[a-zɹː])(?=hˈʌndɹɪd)/g, " ")
.replace(/ z(?=[;:,.!?¡¿—…"«»“” ]|$)/g, "z");
// 5. Additional post-processing for American English
if (language === "a") {
processed = processed.replace(/(?<=nˈaɪn)ti(?!ː)/g, "di");
}
return processed.trim();
}
/**
* Returns true if the character is considered a sentence terminator.
* This includes ASCII (".", "!", "?") and common Unicode terminators.
* NOTE: We also include newlines here, as this is favourable for text-to-speech systems.
* @param {string} c The character to test.
* @param {boolean} [includeNewlines=true] Whether to treat newlines as terminators.
* @returns {boolean}
*/
function isSentenceTerminator(c, includeNewlines = true) {
return ".!?…。?!".includes(c) || (includeNewlines && c === "\n");
}
/**
* Returns true if the character should be attached to the sentence terminator,
* such as closing quotes or brackets.
* @param {string} c The character to test.
* @returns {boolean}
*/
function isTrailingChar(c) {
return "\"')]}」』".includes(c);
}
/**
* Extracts a token (a contiguous sequence of non–whitespace characters)
* from the buffer starting at the given index.
* @param {string} buffer The input text.
* @param {number} start The starting index.
* @returns {string} The extracted token.
*/
function getTokenFromBuffer(buffer, start) {
let end = start;
while (end < buffer.length && !/\s/.test(buffer[end])) {
++end;
}
return buffer.substring(start, end);
}
// List of common abbreviations. Note that strings with single letters joined by periods
// (e.g., "i.e", "e.g", "u.s.a", "u.s") are handled separately.
const ABBREVIATIONS = new Set(["mr", "mrs", "ms", "dr", "prof", "sr", "jr", "sgt", "col", "gen", "rep", "sen", "gov", "lt", "maj", "capt", "st", "mt", "etc", "co", "inc", "ltd", "dept", "vs", "p", "pg", "jan", "feb", "mar", "apr", "jun", "jul", "aug", "sep", "sept", "oct", "nov", "dec", "sun", "mon", "tu", "tue", "tues", "wed", "th", "thu", "thur", "thurs", "fri", "sat"]);
/**
* Determines if the given token (or series of initials) is a known abbreviation.
* @param {string} token The token to check.
* @returns {boolean}
*/
function isAbbreviation(token) {
// Remove possessive endings and trailing periods.
token = token.replace(/['’]s$/i, "").replace(/\.+$/, "");
return ABBREVIATIONS.has(token.toLowerCase());
}
// Map of closing punctuation to their corresponding opening punctuation.
const MATCHING = new Map([
[")", "("],
["]", "["],
["}", "{"],
["", ""],
["", ""],
["", ""],
["»", "«"],
["", ""],
["", ""],
["", ""],
["", ""],
["", ""],
]);
// Set of opening punctuation characters.
const OPENING = new Set(MATCHING.values());
/**
* Updates the nesting stack to track quotes and paired punctuation.
* This supports both standard (", ', (), [], {}) and Japanese quotes (「」「』『』).
* (An apostrophe between letters is ignored so that contractions remain intact.)
* @param {string} c The current character.
* @param {string[]} stack The current nesting stack.
* @param {number} i The index of the character in the buffer.
* @param {string} buffer The full text being processed.
*/
function updateStack(c, stack, i, buffer) {
// Handle standard quotes.
if (c === '"' || c === "'") {
// Ignore an apostrophe if it's between letters (e.g., in contractions).
if (c === "'" && i > 0 && i < buffer.length - 1 && /[A-Za-z]/.test(buffer[i - 1]) && /[A-Za-z]/.test(buffer[i + 1])) {
return;
}
if (stack.length && stack.at(-1) === c) {
stack.pop();
} else {
stack.push(c);
}
return;
}
// Handle opening punctuation.
if (OPENING.has(c)) {
stack.push(c);
return;
}
// Handle closing punctuation.
const expectedOpening = MATCHING.get(c);
if (expectedOpening && stack.length && stack.at(-1) === expectedOpening) {
stack.pop();
}
}
/**
* A simple stream-based text splitter that emits complete sentences.
*/
export class TextSplitterStream {
constructor() {
this._buffer = "";
this._sentences = [];
this._resolver = null;
this._closed = false;
}
/**
* Push one or more text chunks into the stream.
* @param {...string} texts Text fragments to process.
*/
push(...texts) {
for (const txt of texts) {
this._buffer += txt;
this._process();
}
}
/**
* Closes the stream, signaling that no more text will be pushed.
* This will flush any remaining text in the buffer as a sentence
* and allow the consuming process to finish processing the stream.
*/
close() {
if (this._closed) {
throw new Error("Stream is already closed.");
}
this._closed = true;
this.flush();
}
/**
* Flushes any remaining text in the buffer as a sentence.
*/
flush() {
const remainder = this._buffer.trim();
if (remainder.length > 0) {
this._sentences.push(remainder);
}
this._buffer = "";
this._resolve();
}
/**
* Resolve the pending promise to signal that sentences are available.
* @private
*/
_resolve() {
if (this._resolver) {
this._resolver();
this._resolver = null;
}
}
/**
* Processes the internal buffer to extract complete sentences.
* If the potential sentence boundary is at the end of the current buffer,
* it waits for more text before splitting.
* @private
*/
_process() {
let sentenceStart = 0;
const buffer = this._buffer;
const len = buffer.length;
let i = 0;
let stack = [];
// Helper to scan from the current index over trailing terminators and punctuation.
const scanBoundary = (idx) => {
let end = idx;
// Consume contiguous sentence terminators (excluding newlines).
while (end + 1 < len && isSentenceTerminator(buffer[end + 1], false)) {
++end;
}
// Consume trailing characters (e.g., closing quotes/brackets).
while (end + 1 < len && isTrailingChar(buffer[end + 1])) {
++end;
}
let nextNonSpace = end + 1;
while (nextNonSpace < len && /\s/.test(buffer[nextNonSpace])) {
++nextNonSpace;
}
return { end, nextNonSpace };
};
while (i < len) {
const c = buffer[i];
updateStack(c, stack, i, buffer);
// Only consider splitting if we're not inside any nested structure.
if (stack.length === 0 && isSentenceTerminator(c)) {
const currentSegment = buffer.slice(sentenceStart, i);
// Skip splitting for likely numbered lists (e.g., "1." or "\n2.").
if (/(^|\n)\d+$/.test(currentSegment)) {
++i;
continue;
}
const { end: boundaryEnd, nextNonSpace } = scanBoundary(i);
// If the terminator is not a newline and there's no extra whitespace,
// we might be in the middle of a token (e.g., "$9.99"), so skip splitting.
if (i === nextNonSpace - 1 && c !== "\n") {
++i;
continue;
}
// Wait for more text if there's no non-whitespace character yet.
if (nextNonSpace === len) {
break;
}
// Determine the token immediately preceding the terminator.
let tokenStart = i - 1;
while (tokenStart >= 0 && /\S/.test(buffer[tokenStart])) {
tokenStart--;
}
tokenStart = Math.max(sentenceStart, tokenStart + 1);
const token = getTokenFromBuffer(buffer, tokenStart);
if (!token) {
++i;
continue;
}
// --- URL/email protection ---
// If the token appears to be a URL or email (contains "://" or "@")
// and does not already end with a terminator, skip splitting.
if ((/https?[,:]\/\//.test(token) || token.includes("@")) && !isSentenceTerminator(token.at(-1))) {
i = tokenStart + token.length;
continue;
}
// --- Abbreviation protection ---
if (isAbbreviation(token)) {
++i;
continue;
}
// --- Middle initials heuristic ---
// If the token is a series of single-letter initials (each ending in a period)
// and is followed by a capitalized word, assume it's part of a name.
if (/^([A-Za-z]\.)+$/.test(token) && nextNonSpace < len && /[A-Z]/.test(buffer[nextNonSpace])) {
++i;
continue;
}
// --- Lookahead heuristic ---
// If the terminator is a period and the next non–whitespace character is lowercase,
// assume it is not the end of a sentence.
if (c === "." && nextNonSpace < len && /[a-z]/.test(buffer[nextNonSpace])) {
++i;
continue;
}
// Special case: ellipsis that stands alone should be merged with the following sentence.
const sentence = buffer.substring(sentenceStart, boundaryEnd + 1).trim();
if (sentence === "..." || sentence === "") {
++i;
continue;
}
// Accept the sentence boundary.
if (sentence) {
this._sentences.push(sentence);
}
// Move to the next sentence.
i = sentenceStart = boundaryEnd + 1;
continue;
}
++i;
}
// Remove the processed portion of the buffer.
this._buffer = buffer.substring(sentenceStart);
// Resolve any pending promise if sentences are available.
if (this._sentences.length > 0) {
this._resolve();
}
}
/**
* Async iterator to yield sentences as they become available.
* @returns {AsyncGenerator<string, void, void>}
*/
async *[Symbol.asyncIterator]() {
if (this._resolver) {
throw new Error("Another iterator is already active.");
}
while (true) {
if (this._sentences.length > 0) {
yield this._sentences.shift();
} else if (this._closed) {
// No more text will be pushed.
break;
} else {
// Wait for more text.
await new Promise((resolve) => {
this._resolver = resolve;
});
}
}
}
/**
* Synchronous iterator that flushes the buffer and returns all sentences.
* @returns {Iterator<string>}
*/
[Symbol.iterator]() {
this.flush();
const iterator = this._sentences[Symbol.iterator]();
this._sentences = [];
return iterator;
}
/**
* Returns the array of sentences currently available.
* @type {string[]} The array of sentences.
* @readonly
*/
get sentences() {
return this._sentences;
}
}
/**
* Splits the input text into an array of sentences.
* @param {string} text The text to split.
* @returns {string[]} An array of sentences.
*/
export function split(text) {
const splitter = new TextSplitterStream();
splitter.push(text);
return [...splitter];
}
import path from "path";
import fs from "fs/promises";
export const VOICES = Object.freeze({
af_heart: {
name: "Heart",
language: "en-us",
gender: "Female",
traits: "❤️",
targetQuality: "A",
overallGrade: "A",
},
af_alloy: {
name: "Alloy",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C",
},
af_aoede: {
name: "Aoede",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C+",
},
af_bella: {
name: "Bella",
language: "en-us",
gender: "Female",
traits: "🔥",
targetQuality: "A",
overallGrade: "A-",
},
af_jessica: {
name: "Jessica",
language: "en-us",
gender: "Female",
targetQuality: "C",
overallGrade: "D",
},
af_kore: {
name: "Kore",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C+",
},
af_nicole: {
name: "Nicole",
language: "en-us",
gender: "Female",
traits: "🎧",
targetQuality: "B",
overallGrade: "B-",
},
af_nova: {
name: "Nova",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C",
},
af_river: {
name: "River",
language: "en-us",
gender: "Female",
targetQuality: "C",
overallGrade: "D",
},
af_sarah: {
name: "Sarah",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C+",
},
af_sky: {
name: "Sky",
language: "en-us",
gender: "Female",
targetQuality: "B",
overallGrade: "C-",
},
am_adam: {
name: "Adam",
language: "en-us",
gender: "Male",
targetQuality: "D",
overallGrade: "F+",
},
am_echo: {
name: "Echo",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_eric: {
name: "Eric",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_fenrir: {
name: "Fenrir",
language: "en-us",
gender: "Male",
targetQuality: "B",
overallGrade: "C+",
},
am_liam: {
name: "Liam",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_michael: {
name: "Michael",
language: "en-us",
gender: "Male",
targetQuality: "B",
overallGrade: "C+",
},
am_onyx: {
name: "Onyx",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D",
},
am_puck: {
name: "Puck",
language: "en-us",
gender: "Male",
targetQuality: "B",
overallGrade: "C+",
},
am_santa: {
name: "Santa",
language: "en-us",
gender: "Male",
targetQuality: "C",
overallGrade: "D-",
},
bf_emma: {
name: "Emma",
language: "en-gb",
gender: "Female",
traits: "🚺",
targetQuality: "B",
overallGrade: "B-",
},
bf_isabella: {
name: "Isabella",
language: "en-gb",
gender: "Female",
targetQuality: "B",
overallGrade: "C",
},
bm_george: {
name: "George",
language: "en-gb",
gender: "Male",
targetQuality: "B",
overallGrade: "C",
},
bm_lewis: {
name: "Lewis",
language: "en-gb",
gender: "Male",
targetQuality: "C",
overallGrade: "D+",
},
bf_alice: {
name: "Alice",
language: "en-gb",
gender: "Female",
traits: "🚺",
targetQuality: "C",
overallGrade: "D",
},
bf_lily: {
name: "Lily",
language: "en-gb",
gender: "Female",
traits: "🚺",
targetQuality: "C",
overallGrade: "D",
},
bm_daniel: {
name: "Daniel",
language: "en-gb",
gender: "Male",
traits: "🚹",
targetQuality: "C",
overallGrade: "D",
},
bm_fable: {
name: "Fable",
language: "en-gb",
gender: "Male",
traits: "🚹",
targetQuality: "B",
overallGrade: "C",
},
// TODO: Add support for other languages:
// jf_alpha: {
// name: "alpha",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C+",
// },
// jf_gongitsune: {
// name: "gongitsune",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// jf_nezumi: {
// name: "nezumi",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C-",
// },
// jf_tebukuro: {
// name: "tebukuro",
// language: "ja",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// jm_kumo: {
// name: "kumo",
// language: "ja",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C-",
// },
// zf_xiaobei: {
// name: "xiaobei",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zf_xiaoni: {
// name: "xiaoni",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zf_xiaoxiao: {
// name: "xiaoxiao",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zf_xiaoyi: {
// name: "xiaoyi",
// language: "zh",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunjian: {
// name: "yunjian",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunxi: {
// name: "yunxi",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunxia: {
// name: "yunxia",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// zm_yunyang: {
// name: "yunyang",
// language: "zh",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// ef_dora: {
// name: "dora",
// language: "es",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// em_alex: {
// name: "alex",
// language: "es",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// em_santa: {
// name: "santa",
// language: "es",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// ff_siwis: {
// name: "siwis",
// language: "es",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "B-",
// },
// hf_alpha: {
// name: "alpha",
// language: "hi",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// hf_beta: {
// name: "beta",
// language: "hi",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// hm_omega: {
// name: "omega",
// language: "hi",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C",
// },
// hm_psi: {
// name: "psi",
// language: "hi",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C",
// },
// if_sara: {
// name: "sara",
// language: "it",
// gender: "Female",
// traits: "🚺",
// targetQuality: "B",
// overallGrade: "C",
// },
// im_nicola: {
// name: "nicola",
// language: "it",
// gender: "Male",
// traits: "🚹",
// targetQuality: "B",
// overallGrade: "C",
// },
// pf_dora: {
// name: "dora",
// language: "pt-br",
// gender: "Female",
// traits: "🚺",
// targetQuality: "C",
// overallGrade: "D",
// },
// pm_alex: {
// name: "alex",
// language: "pt-br",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
// pm_santa: {
// name: "santa",
// language: "pt-br",
// gender: "Male",
// traits: "🚹",
// targetQuality: "C",
// overallGrade: "D",
// },
});
const VOICE_DATA_URL = "https://huggingface.co/onnx-community/Kokoro-82M-v1.0-ONNX/resolve/main/voices";
/**
*
* @param {keyof typeof VOICES} id
* @returns {Promise<ArrayBufferLike>}
*/
async function getVoiceFile(id) {
if (fs?.readFile) {
const dirname = typeof __dirname !== "undefined" ? __dirname : import.meta.dirname;
const file = path.resolve(dirname, `../voices/${id}.bin`);
const { buffer } = await fs.readFile(file);
return buffer;
}
const url = `${VOICE_DATA_URL}/${id}.bin`;
let cache;
try {
cache = await caches.open("kokoro-voices");
const cachedResponse = await cache.match(url);
if (cachedResponse) {
return await cachedResponse.arrayBuffer();
}
} catch (e) {
console.warn("Unable to open cache", e);
}
// No cache, or cache failed to open. Fetch the file.
const response = await fetch(url);
const buffer = await response.arrayBuffer();
if (cache) {
try {
// NOTE: We use `new Response(buffer, ...)` instead of `response.clone()` to handle LFS files
await cache.put(
url,
new Response(buffer, {
headers: response.headers,
}),
);
} catch (e) {
console.warn("Unable to cache file", e);
}
}
return buffer;
}
const VOICE_CACHE = new Map();
export async function getVoiceData(voice) {
if (VOICE_CACHE.has(voice)) {
return VOICE_CACHE.get(voice);
}
const buffer = new Float32Array(await getVoiceFile(voice));
VOICE_CACHE.set(voice, buffer);
return buffer;
}
import { describe, test, expect } from "vitest";
import { phonemize } from "../src/phonemize.js";
const A_TEST_CASES = new Map([
["‘Hello’", "həlˈoʊ"],
["‘Test’ and ‘Example’", "tˈɛst ænd ɛɡzˈæmpəl"],
["«Bonjour»", '"bɔːnʒˈʊɹ"'],
["«Test «nested» quotes»", '"tˈɛst "nˈɛstᵻd" kwˈoʊts"'],
["(Hello)", "«həlˈoʊ»"],
["(Nested (Parentheses))", "«nˈɛstᵻd «pɚɹˈɛnθəsˌiːz»»"],
["こんにちは、世界!", "dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ, tʃˈaɪniːzlˌɛɾɚ tʃˈaɪniːzlˌɛɾɚ!"],
["これはテストです:はい?", "dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ: dʒˈæpəniːzlˌɛɾɚ dʒˈæpəniːzlˌɛɾɚ?"],
["Hello World", "həlˈoʊ wˈɜːld"],
["Hello World", "həlˈoʊ wˈɜːld"],
["Hello\n \nWorld", "həlˈoʊ wˈɜːld"],
["Dr. Smith", "dˈɑːktɚ smˈɪθ"],
["DR. Brown", "dˈɑːktɚ bɹˈaʊn"],
["Mr. Smith", "mˈɪstɚ smˈɪθ"],
["MR. Anderson", "mˈɪstɚɹ ˈændɚsən"],
["Ms. Taylor", "mˈɪs tˈeɪlɚ"],
["MS. Carter", "mˈɪs kˈɑːɹɾɚ"],
["Mrs. Johnson", "mˈɪsɪz dʒˈɑːnsən"],
["MRS. Wilson", "mˈɪsɪz wˈɪlsən"],
["Apples, oranges, etc.", "ˈæpəlz, ˈɔɹɪndʒᵻz, ɛtsˈɛtɹə"],
["Apples, etc. Pears.", "ˈæpəlz, ɛtsˈɛtɹə. pˈɛɹz."],
["Yeah", "jˈɛə"],
["yeah", "jˈɛə"],
["1990", "nˈaɪntiːn nˈaɪndi"],
["12:34", "twˈɛlv θˈɜːɾi fˈoːɹ"],
["2022s", "twˈɛnti twˈɛnti tˈuːz"],
["1,000", "wˈʌn θˈaʊzənd"],
["12,345,678", "twˈɛlv mˈɪliən θɹˈiː hˈʌndɹɪd fˈoːɹɾi fˈaɪv θˈaʊzənd sˈɪks hˈʌndɹɪd sˈɛvənti ˈeɪt"],
["$100", "wˈʌn hˈʌndɹɪd dˈɑːlɚz"],
["£1.50", "wˈʌn pˈaʊnd ænd fˈɪfti pˈɛns"],
["12.34", "twˈɛlv pˈɔɪnt θɹˈiː fˈoːɹ"],
["0.01", "zˈiəɹoʊ pˈɔɪnt zˈiəɹoʊ wˈʌn"],
["10-20", "tˈɛn tə twˈɛnti"],
["5-10", "fˈaɪv tə tˈɛn"],
["10S", "tˈɛn ˈɛs"],
["5S", "fˈaɪv ˈɛs"],
["Cat's tail", "kˈæts tˈeɪl"],
["X's mark", "ˈɛksᵻz mˈɑːɹk"],
["U.S.A.", "jˈuːˈɛsˈeɪ."],
["A.B.C", "ˈeɪbˈiːsˈiː"],
]);
const B_TEST_CASES = new Map([
["‘Hello’", "həlˈəʊ"],
["‘Test’ and ‘Example’", "tˈɛst and ɛɡzˈampəl"],
["«Bonjour»", '"bɔːnʒˈʊə"'],
["«Test «nested» quotes»", '"tˈɛst "nˈɛstɪd" kwˈəʊts"'],
["(Hello)", "«həlˈəʊ»"],
["(Nested (Parentheses))", "«nˈɛstɪd «pəɹˈɛnθəsˌiːz»»"],
["こんにちは、世界!", "dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə, tʃˈaɪniːzlˌɛtə tʃˈaɪniːzlˌɛtə!"],
["これはテストです:はい?", "dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə: dʒˈapəniːzlˌɛtə dʒˈapəniːzlˌɛtə?"],
["Hello World", "həlˈəʊ wˈɜːld"],
["Hello World", "həlˈəʊ wˈɜːld"],
["Hello\n \nWorld", "həlˈəʊ wˈɜːld"],
["Dr. Smith", "dˈɒktə smˈɪθ"],
["DR. Brown", "dˈɒktə bɹˈaʊn"],
["Mr. Smith", "mˈɪstə smˈɪθ"],
["MR. Anderson", "mˈɪstəɹ ˈandəsən"],
["Ms. Taylor", "mˈɪs tˈeɪlə"],
["MS. Carter", "mˈɪs kˈɑːtə"],
["Mrs. Johnson", "mˈɪsɪz dʒˈɒnsən"],
["Apples, oranges, etc.", "ˈapəlz, ˈɒɹɪndʒɪz, ɛtsˈɛtɹə"],
["Apples, etc. Pears.", "ˈapəlz, ɛtsˈɛtɹə. pˈeəz."],
["1990", "nˈaɪntiːn nˈaɪnti"],
["12:34", "twˈɛlv θˈɜːti fˈɔː"],
["1,000", "wˈɒn θˈaʊzənd"],
["12,345,678", "twˈɛlv mˈɪliən θɹˈiː hˈʌndɹɪdən fˈɔːti fˈaɪv θˈaʊzənd sˈɪks hˈʌndɹɪdən sˈɛvənti ˈeɪt"],
["$100", "wˈɒn hˈʌndɹɪd dˈɒləz"],
["£1.50", "wˈɒn pˈaʊnd and fˈɪfti pˈɛns"],
["12.34", "twˈɛlv pˈɔɪnt θɹˈiː fˈɔː"],
["0.01", "zˈiəɹəʊ pˈɔɪnt zˈiəɹəʊ wˈɒn"],
["Cat's tail", "kˈats tˈeɪl"],
["X's mark", "ˈɛksɪz mˈɑːk"],
]);
describe("phonemize", () => {
describe("en-us", () => {
for (const [input, expected] of A_TEST_CASES) {
test(`phonemize("${input}")`, async () => {
expect(await phonemize(input)).toEqual(expected);
});
}
});
describe("en-gb", () => {
for (const [input, expected] of B_TEST_CASES) {
test(`phonemize("${input}")`, async () => {
expect(await phonemize(input, "b")).toEqual(expected);
});
}
});
});
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment