Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
f245c626
"...testcase/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "07dd4c54455b0d8e71d61f65a002d83d5f65455d"
Commit
f245c626
authored
Feb 11, 2024
by
Timothy J. Baek
Browse files
feat: whisper voice input
parent
c6c69924
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
219 additions
and
76 deletions
+219
-76
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+164
-53
src/lib/components/chat/Messages/ResponseMessage.svelte
src/lib/components/chat/Messages/ResponseMessage.svelte
+3
-3
src/lib/components/chat/Settings/Voice.svelte
src/lib/components/chat/Settings/Voice.svelte
+52
-20
No files found.
src/lib/components/chat/MessageInput.svelte
View file @
f245c626
...
@@ -35,7 +35,6 @@
...
@@ -35,7 +35,6 @@
export let fileUploadEnabled = true;
export let fileUploadEnabled = true;
export let speechRecognitionEnabled = true;
export let speechRecognitionEnabled = true;
export let speechRecognitionListening = false;
export let prompt = '';
export let prompt = '';
export let messages = [];
export let messages = [];
...
@@ -51,62 +50,170 @@
...
@@ -51,62 +50,170 @@
}
}
}
}
let mediaRecorder;
let audioChunks = [];
let isRecording = false;
const MIN_DECIBELS = -45;
const startRecording = async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.onstart = () => {
isRecording = true;
console.log('Recording started');
};
mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
mediaRecorder.onstop = async () => {
isRecording = false;
console.log('Recording stopped');
// Create a blob from the audio chunks
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
const file = blobToFile(audioBlob, 'recording.wav');
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
toast.error(error);
return null;
});
if (res) {
prompt = res.text;
await tick();
const inputElement = document.getElementById('chat-textarea');
inputElement?.focus();
if (prompt !== '' && $settings?.speechAutoSend === true) {
submitPrompt(prompt, user);
}
}
// saveRecording(audioBlob);
audioChunks = [];
};
// Start recording
mediaRecorder.start();
// Monitor silence
monitorSilence(stream);
};
const monitorSilence = (stream) => {
const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream);
const analyser = audioContext.createAnalyser();
analyser.minDecibels = MIN_DECIBELS;
audioStreamSource.connect(analyser);
const bufferLength = analyser.frequencyBinCount;
const domainData = new Uint8Array(bufferLength);
let lastSoundTime = Date.now();
const detectSound = () => {
analyser.getByteFrequencyData(domainData);
if (domainData.some((value) => value > 0)) {
lastSoundTime = Date.now();
}
if (isRecording && Date.now() - lastSoundTime > 3000) {
mediaRecorder.stop();
audioContext.close();
return;
}
window.requestAnimationFrame(detectSound);
};
window.requestAnimationFrame(detectSound);
};
const saveRecording = (blob) => {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
document.body.appendChild(a);
a.style = 'display: none';
a.href = url;
a.download = 'recording.wav';
a.click();
window.URL.revokeObjectURL(url);
};
const speechRecognitionHandler = () => {
const speechRecognitionHandler = () => {
// Check if SpeechRecognition is supported
// Check if SpeechRecognition is supported
if (speechRecognitionListening) {
if (isRecording) {
speechRecognition.stop();
if (speechRecognition) {
} else {
speechRecognition.stop();
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
}
// Create a SpeechRecognition object
speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
// Set continuous to true for continuous recognition
speechRecognition.continuous = true;
// Set the timeout for turning off the recognition after inactivity (in milliseconds)
const inactivityTimeout = 3000; // 3 seconds
let timeoutId;
// Start recognition
speechRecognition.start();
speechRecognitionListening = true;
// Event triggered when speech is recognized
speechRecognition.onresult = function (event) {
// Clear the inactivity timeout
clearTimeout(timeoutId);
// Handle recognized speech
console.log(event);
const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
prompt = `${prompt}${transcript}`;
// Restart the inactivity timeout
timeoutId = setTimeout(() => {
console.log('Speech recognition turned off due to inactivity.');
speechRecognition.stop();
}, inactivityTimeout);
};
// Event triggered when recognition is ended
if (mediaRecorder) {
speechRecognition.onend = function () {
mediaRecorder.stop();
// Restart recognition after it ends
}
console.log('recognition ended');
} else {
speechRecognitionListening = false;
isRecording = true;
if (prompt !== '' && $settings?.speechAutoSend === true) {
submitPrompt(prompt, user);
}
};
// Event triggered when an error occurs
if ($settings?.voice?.STTEngine ?? '' !== '') {
speechRecognition.onerror = function (event) {
startRecording();
console.log(event);
toast.error(`Speech recognition error: ${event.error}`);
speechRecognitionListening = false;
};
} else {
} else {
toast.error('SpeechRecognition API is not supported in this browser.');
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
// Create a SpeechRecognition object
speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
// Set continuous to true for continuous recognition
speechRecognition.continuous = true;
// Set the timeout for turning off the recognition after inactivity (in milliseconds)
const inactivityTimeout = 3000; // 3 seconds
let timeoutId;
// Start recognition
speechRecognition.start();
// Event triggered when speech is recognized
speechRecognition.onresult = async (event) => {
// Clear the inactivity timeout
clearTimeout(timeoutId);
// Handle recognized speech
console.log(event);
const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
prompt = `${prompt}${transcript}`;
await tick();
const inputElement = document.getElementById('chat-textarea');
inputElement?.focus();
// Restart the inactivity timeout
timeoutId = setTimeout(() => {
console.log('Speech recognition turned off due to inactivity.');
speechRecognition.stop();
}, inactivityTimeout);
};
// Event triggered when recognition is ended
speechRecognition.onend = function () {
// Restart recognition after it ends
console.log('recognition ended');
isRecording = false;
if (prompt !== '' && $settings?.speechAutoSend === true) {
submitPrompt(prompt, user);
}
};
// Event triggered when an error occurs
speechRecognition.onerror = function (event) {
console.log(event);
toast.error(`Speech recognition error: ${event.error}`);
isRecording = false;
};
} else {
toast.error('SpeechRecognition API is not supported in this browser.');
}
}
}
}
}
};
};
...
@@ -550,7 +657,7 @@
...
@@ -550,7 +657,7 @@
: ' pl-4'} rounded-xl resize-none h-[48px]"
: ' pl-4'} rounded-xl resize-none h-[48px]"
placeholder={chatInputPlaceholder !== ''
placeholder={chatInputPlaceholder !== ''
? chatInputPlaceholder
? chatInputPlaceholder
:
speechRecognitionListen
ing
:
isRecord
ing
? 'Listening...'
? 'Listening...'
: 'Send a message'}
: 'Send a message'}
bind:value={prompt}
bind:value={prompt}
...
@@ -659,6 +766,10 @@
...
@@ -659,6 +766,10 @@
e.target.style.height = Math.min(e.target.scrollHeight, 200) + 'px';
e.target.style.height = Math.min(e.target.scrollHeight, 200) + 'px';
user = null;
user = null;
}}
}}
on:focus={(e) => {
e.target.style.height = '';
e.target.style.height = Math.min(e.target.scrollHeight, 200) + 'px';
}}
on:paste={(e) => {
on:paste={(e) => {
const clipboardData = e.clipboardData || window.clipboardData;
const clipboardData = e.clipboardData || window.clipboardData;
...
@@ -696,7 +807,7 @@
...
@@ -696,7 +807,7 @@
speechRecognitionHandler();
speechRecognitionHandler();
}}
}}
>
>
{#if
speechRecognitionListen
ing}
{#if
isRecord
ing}
<svg
<svg
class=" w-5 h-5 translate-y-[0.5px]"
class=" w-5 h-5 translate-y-[0.5px]"
fill="currentColor"
fill="currentColor"
...
...
src/lib/components/chat/Messages/ResponseMessage.svelte
View file @
f245c626
...
@@ -148,7 +148,7 @@
...
@@ -148,7 +148,7 @@
} else {
} else {
speaking = true;
speaking = true;
if ($settings?.
speech?.e
ngine === 'openai') {
if ($settings?.
voice?.TTSE
ngine === 'openai') {
loadingSpeech = true;
loadingSpeech = true;
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
...
@@ -179,7 +179,7 @@
...
@@ -179,7 +179,7 @@
for (const [idx, sentence] of sentences.entries()) {
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
const res = await synthesizeOpenAISpeech(
localStorage.token,
localStorage.token,
$settings?.
speech
?.speaker,
$settings?.
voice
?.speaker,
sentence
sentence
).catch((error) => {
).catch((error) => {
toast.error(error);
toast.error(error);
...
@@ -204,7 +204,7 @@
...
@@ -204,7 +204,7 @@
clearInterval(getVoicesLoop);
clearInterval(getVoicesLoop);
const voice =
const voice =
voices?.filter((v) => v.name === $settings?.
speech
?.speaker)?.at(0) ?? undefined;
voices?.filter((v) => v.name === $settings?.
voice
?.speaker)?.at(0) ?? undefined;
const speak = new SpeechSynthesisUtterance(message.content);
const speak = new SpeechSynthesisUtterance(message.content);
...
...
src/lib/components/chat/Settings/Voice.svelte
View file @
f245c626
<script lang="ts">
<script lang="ts">
import { createEventDispatcher, onMount } from 'svelte';
import { createEventDispatcher, onMount } from 'svelte';
import toast from 'svelte-french-toast';
const dispatch = createEventDispatcher();
const dispatch = createEventDispatcher();
export let saveSettings: Function;
export let saveSettings: Function;
// Voice
// Voice
let STTEngines = ['', 'openai'];
let STTEngine = '';
let conversationMode = false;
let conversationMode = false;
let speechAutoSend = false;
let speechAutoSend = false;
let responseAutoPlayback = false;
let responseAutoPlayback = false;
let
e
ngines = ['', 'openai'];
let
TTSE
ngines = ['', 'openai'];
let
e
ngine = '';
let
TTSE
ngine = '';
let voices = [];
let voices = [];
let speaker = '';
let speaker = '';
...
@@ -70,10 +74,11 @@
...
@@ -70,10 +74,11 @@
speechAutoSend = settings.speechAutoSend ?? false;
speechAutoSend = settings.speechAutoSend ?? false;
responseAutoPlayback = settings.responseAutoPlayback ?? false;
responseAutoPlayback = settings.responseAutoPlayback ?? false;
engine = settings?.speech?.engine ?? '';
STTEngine = settings?.voice?.STTEngine ?? '';
speaker = settings?.speech?.speaker ?? '';
TTSEngine = settings?.voice?.TTSEngine ?? '';
speaker = settings?.voice?.speaker ?? '';
if (
e
ngine === 'openai') {
if (
TTSE
ngine === 'openai') {
getOpenAIVoices();
getOpenAIVoices();
} else {
} else {
getWebAPIVoices();
getWebAPIVoices();
...
@@ -85,37 +90,37 @@
...
@@ -85,37 +90,37 @@
class="flex flex-col h-full justify-between space-y-3 text-sm"
class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => {
on:submit|preventDefault={() => {
saveSettings({
saveSettings({
speech: {
voice: {
engine: engine !== '' ? engine : undefined,
STTEngine: STTEngine !== '' ? STTEngine : undefined,
TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
speaker: speaker !== '' ? speaker : undefined
speaker: speaker !== '' ? speaker : undefined
}
}
});
});
dispatch('save');
dispatch('save');
}}
}}
>
>
<div class=" space-y-3">
<div class=" space-y-3
pr-1.5 overflow-y-scroll max-h-80
">
<div>
<div>
<div class=" mb-1 text-sm font-medium">TT
S
Settings</div>
<div class=" mb-1 text-sm font-medium">
S
TT Settings</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">Speech Engine</div>
<div class=" self-center text-xs font-medium">Speech
-to-Text
Engine</div>
<div class="flex items-center relative">
<div class="flex items-center relative">
<select
<select
class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={
e
ngine}
bind:value={
STTE
ngine}
placeholder="Select a mode"
placeholder="Select a mode"
on:change={(e) => {
on:change={(e) => {
if (e.target.value === 'openai') {
if (e.target.value !== '') {
getOpenAIVoices();
navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
speaker = 'alloy';
toast.error(`Permission denied when accessing microphone: ${err}`);
} else {
STTEngine = '';
getWebAPIVoices();
});
speaker = '';
}
}
}}
}}
>
>
<option value="">Default (Web API)</option>
<option value="">Default (Web API)</option>
<option value="
openai">Open AI
</option>
<option value="
whisper-local">Whisper (Local)
</option>
</select>
</select>
</div>
</div>
</div>
</div>
...
@@ -155,6 +160,33 @@
...
@@ -155,6 +160,33 @@
{/if}
{/if}
</button>
</button>
</div>
</div>
</div>
<div>
<div class=" mb-1 text-sm font-medium">TTS Settings</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">Text-to-Speech Engine</div>
<div class="flex items-center relative">
<select
class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTSEngine}
placeholder="Select a mode"
on:change={(e) => {
if (e.target.value === 'openai') {
getOpenAIVoices();
speaker = 'alloy';
} else {
getWebAPIVoices();
speaker = '';
}
}}
>
<option value="">Default (Web API)</option>
<option value="openai">Open AI</option>
</select>
</div>
</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">Auto-playback response</div>
<div class=" self-center text-xs font-medium">Auto-playback response</div>
...
@@ -177,7 +209,7 @@
...
@@ -177,7 +209,7 @@
<hr class=" dark:border-gray-700" />
<hr class=" dark:border-gray-700" />
{#if
e
ngine === ''}
{#if
TTSE
ngine === ''}
<div>
<div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full">
<div class="flex w-full">
...
@@ -196,7 +228,7 @@
...
@@ -196,7 +228,7 @@
</div>
</div>
</div>
</div>
</div>
</div>
{:else if
e
ngine === 'openai'}
{:else if
TTSE
ngine === 'openai'}
<div>
<div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full">
<div class="flex w-full">
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment