Unverified Commit 9d5e3e2a authored by Timothy Jaeryang Baek's avatar Timothy Jaeryang Baek Committed by GitHub
Browse files

Merge pull request #4299 from open-webui/dev-elevenlabs

feat: Fetch ElevenLabs voice ID by name
parents 67efd0dd 7f260938
...@@ -10,12 +10,12 @@ from fastapi import ( ...@@ -10,12 +10,12 @@ from fastapi import (
File, File,
Form, Form,
) )
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel from pydantic import BaseModel
from typing import List
import uuid import uuid
import requests import requests
import hashlib import hashlib
...@@ -31,6 +31,7 @@ from utils.utils import ( ...@@ -31,6 +31,7 @@ from utils.utils import (
) )
from utils.misc import calculate_sha256 from utils.misc import calculate_sha256
from config import ( from config import (
SRC_LOG_LEVELS, SRC_LOG_LEVELS,
CACHE_DIR, CACHE_DIR,
...@@ -252,15 +253,15 @@ async def speech(request: Request, user=Depends(get_verified_user)): ...@@ -252,15 +253,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
) )
elif app.state.config.TTS_ENGINE == "elevenlabs": elif app.state.config.TTS_ENGINE == "elevenlabs":
payload = None payload = None
try: try:
payload = json.loads(body.decode("utf-8")) payload = json.loads(body.decode("utf-8"))
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
pass raise HTTPException(status_code=400, detail="Invalid JSON payload")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}" voice_id = payload.get("voice", "")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = { headers = {
"Accept": "audio/mpeg", "Accept": "audio/mpeg",
...@@ -435,3 +436,69 @@ def transcribe( ...@@ -435,3 +436,69 @@ def transcribe(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e), detail=ERROR_MESSAGES.DEFAULT(e),
) )
def get_available_models() -> List[dict]:
if app.state.config.TTS_ENGINE == "openai":
return [{"id": "tts-1"}, {"id": "tts-1-hd"}]
elif app.state.config.TTS_ENGINE == "elevenlabs":
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
try:
response = requests.get(
"https://api.elevenlabs.io/v1/models", headers=headers
)
response.raise_for_status()
models = response.json()
return [
{"name": model["name"], "id": model["model_id"]} for model in models
]
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return []
@app.get("/models")
async def get_models(user=Depends(get_verified_user)):
return {"models": get_available_models()}
def get_available_voices() -> List[dict]:
if app.state.config.TTS_ENGINE == "openai":
return [
{"name": "alloy", "id": "alloy"},
{"name": "echo", "id": "echo"},
{"name": "fable", "id": "fable"},
{"name": "onyx", "id": "onyx"},
{"name": "nova", "id": "nova"},
{"name": "shimmer", "id": "shimmer"},
]
elif app.state.config.TTS_ENGINE == "elevenlabs":
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
try:
response = requests.get(
"https://api.elevenlabs.io/v1/voices", headers=headers
)
response.raise_for_status()
voices_data = response.json()
voices = []
for voice in voices_data.get("voices", []):
voices.append({"name": voice["name"], "id": voice["voice_id"]})
return voices
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return []
@app.get("/voices")
async def get_voices(user=Depends(get_verified_user)):
return {"voices": get_available_voices()}
...@@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async ( ...@@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async (
return res; return res;
}; };
export const getModels = async (token: string = '') => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};
export const getVoices = async (token: string = '') => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};
<script lang="ts"> <script lang="ts">
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import Switch from '$lib/components/common/Switch.svelte'; import { createEventDispatcher, onMount, getContext } from 'svelte';
const dispatch = createEventDispatcher();
import { getBackendConfig } from '$lib/apis'; import { getBackendConfig } from '$lib/apis';
import {
getAudioConfig,
updateAudioConfig,
getModels as _getModels,
getVoices as _getVoices
} from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const dispatch = createEventDispatcher();
const i18n = getContext('i18n'); const i18n = getContext('i18n');
...@@ -30,30 +36,41 @@ ...@@ -30,30 +36,41 @@
let models = []; let models = [];
let nonLocalVoices = false; let nonLocalVoices = false;
const getOpenAIVoices = () => { const getModels = async () => {
voices = [ if (TTS_ENGINE === '') {
{ name: 'alloy' }, models = [];
{ name: 'echo' }, } else {
{ name: 'fable' }, const res = await _getModels(localStorage.token).catch((e) => {
{ name: 'onyx' }, toast.error(e);
{ name: 'nova' }, });
{ name: 'shimmer' }
];
};
const getOpenAIModels = () => { if (res) {
models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }]; console.log(res);
models = res.models;
}
}
}; };
const getWebAPIVoices = () => { const getVoices = async () => {
const getVoicesLoop = setInterval(async () => { if (TTS_ENGINE === '') {
voices = await speechSynthesis.getVoices(); const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
}
}, 100);
} else {
const res = await _getVoices(localStorage.token).catch((e) => {
toast.error(e);
});
// do your loop if (res) {
if (voices.length > 0) { console.log(res);
clearInterval(getVoicesLoop); voices = res.voices;
} }
}, 100); }
}; };
const updateConfigHandler = async () => { const updateConfigHandler = async () => {
...@@ -101,12 +118,8 @@ ...@@ -101,12 +118,8 @@
STT_MODEL = res.stt.MODEL; STT_MODEL = res.stt.MODEL;
} }
if (TTS_ENGINE === 'openai') { await getVoices();
getOpenAIVoices(); await getModels();
getOpenAIModels();
} else {
getWebAPIVoices();
}
}); });
</script> </script>
...@@ -185,13 +198,15 @@ ...@@ -185,13 +198,15 @@
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTS_ENGINE} bind:value={TTS_ENGINE}
placeholder="Select a mode" placeholder="Select a mode"
on:change={(e) => { on:change={async (e) => {
await updateConfigHandler();
await getVoices();
await getModels();
if (e.target.value === 'openai') { if (e.target.value === 'openai') {
getOpenAIVoices();
TTS_VOICE = 'alloy'; TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1'; TTS_MODEL = 'tts-1';
} else { } else {
getWebAPIVoices();
TTS_VOICE = ''; TTS_VOICE = '';
TTS_MODEL = ''; TTS_MODEL = '';
} }
...@@ -268,7 +283,7 @@ ...@@ -268,7 +283,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
...@@ -279,15 +294,15 @@ ...@@ -279,15 +294,15 @@
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<input <input
list="model-list" list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_MODEL} bind:value={TTS_MODEL}
placeholder="Select a model" placeholder="Select a model"
/> />
<datalist id="model-list"> <datalist id="tts-model-list">
{#each models as model} {#each models as model}
<option value={model.name} /> <option value={model.id} />
{/each} {/each}
</datalist> </datalist>
</div> </div>
...@@ -309,7 +324,7 @@ ...@@ -309,7 +324,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
...@@ -320,15 +335,15 @@ ...@@ -320,15 +335,15 @@
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<input <input
list="model-list" list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_MODEL} bind:value={TTS_MODEL}
placeholder="Select a model" placeholder="Select a model"
/> />
<datalist id="model-list"> <datalist id="tts-model-list">
{#each models as model} {#each models as model}
<option value={model.name} /> <option value={model.id} />
{/each} {/each}
</datalist> </datalist>
</div> </div>
......
<script lang="ts"> <script lang="ts">
import { user, settings, config } from '$lib/stores';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { user, settings, config } from '$lib/stores';
import { getVoices as _getVoices } from '$lib/apis/audio';
import Switch from '$lib/components/common/Switch.svelte'; import Switch from '$lib/components/common/Switch.svelte';
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
...@@ -20,26 +23,26 @@ ...@@ -20,26 +23,26 @@
let voices = []; let voices = [];
let voice = ''; let voice = '';
const getOpenAIVoices = () => { const getVoices = async () => {
voices = [ if ($config.audio.tts.engine === '') {
{ name: 'alloy' }, const getVoicesLoop = setInterval(async () => {
{ name: 'echo' }, voices = await speechSynthesis.getVoices();
{ name: 'fable' },
{ name: 'onyx' },
{ name: 'nova' },
{ name: 'shimmer' }
];
};
const getWebAPIVoices = () => { // do your loop
const getVoicesLoop = setInterval(async () => { if (voices.length > 0) {
voices = await speechSynthesis.getVoices(); clearInterval(getVoicesLoop);
}
}, 100);
} else {
const res = await _getVoices(localStorage.token).catch((e) => {
toast.error(e);
});
// do your loop if (res) {
if (voices.length > 0) { console.log(res);
clearInterval(getVoicesLoop); voices = res.voices;
} }
}, 100); }
}; };
const toggleResponseAutoPlayback = async () => { const toggleResponseAutoPlayback = async () => {
...@@ -61,11 +64,7 @@ ...@@ -61,11 +64,7 @@
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? ''; voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false; nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
if ($config.audio.tts.engine === 'openai') { await getVoices();
getOpenAIVoices();
} else {
getWebAPIVoices();
}
}); });
</script> </script>
...@@ -195,7 +194,7 @@ ...@@ -195,7 +194,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment