Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
open-webui
Commits
4727e5cb
Unverified
Commit
4727e5cb
authored
Jun 13, 2024
by
Timothy Jaeryang Baek
Committed by
GitHub
Jun 13, 2024
Browse files
Merge pull request #3128 from open-webui/voice-enh
enh: voice
parents
bdd2ac00
b8136951
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
670 additions
and
354 deletions
+670
-354
backend/main.py
backend/main.py
+68
-0
src/lib/apis/index.ts
src/lib/apis/index.ts
+40
-0
src/lib/components/chat/Chat.svelte
src/lib/components/chat/Chat.svelte
+112
-104
src/lib/components/chat/MessageInput.svelte
src/lib/components/chat/MessageInput.svelte
+3
-4
src/lib/components/chat/MessageInput/CallOverlay.svelte
src/lib/components/chat/MessageInput/CallOverlay.svelte
+413
-244
src/lib/components/chat/Messages.svelte
src/lib/components/chat/Messages.svelte
+1
-1
src/lib/components/chat/Settings/Interface.svelte
src/lib/components/chat/Settings/Interface.svelte
+32
-0
src/lib/utils/index.ts
src/lib/utils/index.ts
+1
-1
No files found.
backend/main.py
View file @
4727e5cb
...
@@ -494,6 +494,9 @@ def filter_pipeline(payload, user):
...
@@ -494,6 +494,9 @@ def filter_pipeline(payload, user):
if
"title"
in
payload
:
if
"title"
in
payload
:
del
payload
[
"title"
]
del
payload
[
"title"
]
if
"task"
in
payload
:
del
payload
[
"task"
]
return
payload
return
payload
...
@@ -835,6 +838,71 @@ async def generate_search_query(form_data: dict, user=Depends(get_verified_user)
...
@@ -835,6 +838,71 @@ async def generate_search_query(form_data: dict, user=Depends(get_verified_user)
"messages"
:
[{
"role"
:
"user"
,
"content"
:
content
}],
"messages"
:
[{
"role"
:
"user"
,
"content"
:
content
}],
"stream"
:
False
,
"stream"
:
False
,
"max_tokens"
:
30
,
"max_tokens"
:
30
,
"task"
:
True
,
}
print
(
payload
)
try
:
payload
=
filter_pipeline
(
payload
,
user
)
except
Exception
as
e
:
return
JSONResponse
(
status_code
=
e
.
args
[
0
],
content
=
{
"detail"
:
e
.
args
[
1
]},
)
if
model
[
"owned_by"
]
==
"ollama"
:
return
await
generate_ollama_chat_completion
(
OpenAIChatCompletionForm
(
**
payload
),
user
=
user
)
else
:
return
await
generate_openai_chat_completion
(
payload
,
user
=
user
)
@
app
.
post
(
"/api/task/emoji/completions"
)
async
def
generate_emoji
(
form_data
:
dict
,
user
=
Depends
(
get_verified_user
)):
print
(
"generate_emoji"
)
model_id
=
form_data
[
"model"
]
if
model_id
not
in
app
.
state
.
MODELS
:
raise
HTTPException
(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
"Model not found"
,
)
# Check if the user has a custom task model
# If the user has a custom task model, use that model
if
app
.
state
.
MODELS
[
model_id
][
"owned_by"
]
==
"ollama"
:
if
app
.
state
.
config
.
TASK_MODEL
:
task_model_id
=
app
.
state
.
config
.
TASK_MODEL
if
task_model_id
in
app
.
state
.
MODELS
:
model_id
=
task_model_id
else
:
if
app
.
state
.
config
.
TASK_MODEL_EXTERNAL
:
task_model_id
=
app
.
state
.
config
.
TASK_MODEL_EXTERNAL
if
task_model_id
in
app
.
state
.
MODELS
:
model_id
=
task_model_id
print
(
model_id
)
model
=
app
.
state
.
MODELS
[
model_id
]
template
=
'''
Your task is to reflect the speaker's likely facial expression through a fitting emoji. Interpret emotions from the message and reflect their facial expression using fitting, diverse emojis (e.g., 😊, 😢, 😡, 😱).
Message: """{{prompt}}"""
'''
content
=
title_generation_template
(
template
,
form_data
[
"prompt"
],
user
.
model_dump
()
)
payload
=
{
"model"
:
model_id
,
"messages"
:
[{
"role"
:
"user"
,
"content"
:
content
}],
"stream"
:
False
,
"max_tokens"
:
4
,
"chat_id"
:
form_data
.
get
(
"chat_id"
,
None
),
"task"
:
True
,
}
}
print
(
payload
)
print
(
payload
)
...
...
src/lib/apis/index.ts
View file @
4727e5cb
...
@@ -205,6 +205,46 @@ export const generateTitle = async (
...
@@ -205,6 +205,46 @@ export const generateTitle = async (
return
res
?.
choices
[
0
]?.
message
?.
content
.
replace
(
/
[
"'
]
/g
,
''
)
??
'
New Chat
'
;
return
res
?.
choices
[
0
]?.
message
?.
content
.
replace
(
/
[
"'
]
/g
,
''
)
??
'
New Chat
'
;
};
};
export
const
generateEmoji
=
async
(
token
:
string
=
''
,
model
:
string
,
prompt
:
string
,
chat_id
?:
string
)
=>
{
let
error
=
null
;
const
res
=
await
fetch
(
`
${
WEBUI_BASE_URL
}
/api/task/emoji/completions`
,
{
method
:
'
POST
'
,
headers
:
{
Accept
:
'
application/json
'
,
'
Content-Type
'
:
'
application/json
'
,
Authorization
:
`Bearer
${
token
}
`
},
body
:
JSON
.
stringify
({
model
:
model
,
prompt
:
prompt
,
...(
chat_id
&&
{
chat_id
:
chat_id
})
})
})
.
then
(
async
(
res
)
=>
{
if
(
!
res
.
ok
)
throw
await
res
.
json
();
return
res
.
json
();
})
.
catch
((
err
)
=>
{
console
.
log
(
err
);
if
(
'
detail
'
in
err
)
{
error
=
err
.
detail
;
}
return
null
;
});
if
(
error
)
{
throw
error
;
}
return
res
?.
choices
[
0
]?.
message
?.
content
.
replace
(
/
[
"'
]
/g
,
''
)
??
null
;
};
export
const
generateSearchQuery
=
async
(
export
const
generateSearchQuery
=
async
(
token
:
string
=
''
,
token
:
string
=
''
,
model
:
string
,
model
:
string
,
...
...
src/lib/components/chat/Chat.svelte
View file @
4727e5cb
...
@@ -64,6 +64,8 @@
...
@@ -64,6 +64,8 @@
export let chatIdProp = '';
export let chatIdProp = '';
let loaded = false;
let loaded = false;
const eventTarget = new EventTarget();
let stopResponseFlag = false;
let stopResponseFlag = false;
let autoScroll = true;
let autoScroll = true;
let processing = '';
let processing = '';
...
@@ -300,7 +302,7 @@
...
@@ -300,7 +302,7 @@
// Chat functions
// Chat functions
//////////////////////////
//////////////////////////
const submitPrompt = async (userPrompt,
_user = null
) => {
const submitPrompt = async (userPrompt,
{ _raw = false } = {}
) => {
let _responses = [];
let _responses = [];
console.log('submitPrompt', $chatId);
console.log('submitPrompt', $chatId);
...
@@ -344,7 +346,6 @@
...
@@ -344,7 +346,6 @@
parentId: messages.length !== 0 ? messages.at(-1).id : null,
parentId: messages.length !== 0 ? messages.at(-1).id : null,
childrenIds: [],
childrenIds: [],
role: 'user',
role: 'user',
user: _user ?? undefined,
content: userPrompt,
content: userPrompt,
files: _files.length > 0 ? _files : undefined,
files: _files.length > 0 ? _files : undefined,
timestamp: Math.floor(Date.now() / 1000), // Unix epoch
timestamp: Math.floor(Date.now() / 1000), // Unix epoch
...
@@ -362,15 +363,13 @@
...
@@ -362,15 +363,13 @@
// Wait until history/message have been updated
// Wait until history/message have been updated
await tick();
await tick();
_responses = await sendPrompt(userPrompt, userMessageId, { newChat: true });
// Send prompt
_responses = await sendPrompt(userPrompt, userMessageId);
}
}
return _responses;
return _responses;
};
};
const sendPrompt = async (prompt, parentId, modelId = null, newChat =
true
) => {
const sendPrompt = async (prompt, parentId,
{
modelId = null, newChat =
false } = {}
) => {
let _responses = [];
let _responses = [];
// If modelId is provided, use it, else use selected model
// If modelId is provided, use it, else use selected model
...
@@ -490,7 +489,6 @@
...
@@ -490,7 +489,6 @@
responseMessage.userContext = userContext;
responseMessage.userContext = userContext;
const chatEventEmitter = await getChatEventEmitter(model.id, _chatId);
const chatEventEmitter = await getChatEventEmitter(model.id, _chatId);
if (webSearchEnabled) {
if (webSearchEnabled) {
await getWebSearchResults(model.id, parentId, responseMessageId);
await getWebSearchResults(model.id, parentId, responseMessageId);
}
}
...
@@ -503,8 +501,6 @@
...
@@ -503,8 +501,6 @@
}
}
_responses.push(_response);
_responses.push(_response);
console.log('chatEventEmitter', chatEventEmitter);
if (chatEventEmitter) clearInterval(chatEventEmitter);
if (chatEventEmitter) clearInterval(chatEventEmitter);
} else {
} else {
toast.error($i18n.t(`Model {{modelId}} not found`, { modelId }));
toast.error($i18n.t(`Model {{modelId}} not found`, { modelId }));
...
@@ -513,88 +509,9 @@
...
@@ -513,88 +509,9 @@
);
);
await chats.set(await getChatList(localStorage.token));
await chats.set(await getChatList(localStorage.token));
return _responses;
return _responses;
};
};
const getWebSearchResults = async (model: string, parentId: string, responseId: string) => {
const responseMessage = history.messages[responseId];
responseMessage.statusHistory = [
{
done: false,
action: 'web_search',
description: $i18n.t('Generating search query')
}
];
messages = messages;
const prompt = history.messages[parentId].content;
let searchQuery = await generateSearchQuery(localStorage.token, model, messages, prompt).catch(
(error) => {
console.log(error);
return prompt;
}
);
if (!searchQuery) {
toast.warning($i18n.t('No search query generated'));
responseMessage.statusHistory.push({
done: true,
error: true,
action: 'web_search',
description: 'No search query generated'
});
messages = messages;
}
responseMessage.statusHistory.push({
done: false,
action: 'web_search',
description: $i18n.t(`Searching "{{searchQuery}}"`, { searchQuery })
});
messages = messages;
const results = await runWebSearch(localStorage.token, searchQuery).catch((error) => {
console.log(error);
toast.error(error);
return null;
});
if (results) {
responseMessage.statusHistory.push({
done: true,
action: 'web_search',
description: $i18n.t('Searched {{count}} sites', { count: results.filenames.length }),
query: searchQuery,
urls: results.filenames
});
if (responseMessage?.files ?? undefined === undefined) {
responseMessage.files = [];
}
responseMessage.files.push({
collection_name: results.collection_name,
name: searchQuery,
type: 'web_search_results',
urls: results.filenames
});
messages = messages;
} else {
responseMessage.statusHistory.push({
done: true,
error: true,
action: 'web_search',
description: 'No search results found'
});
messages = messages;
}
};
const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => {
const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => {
let _response = null;
let _response = null;
...
@@ -676,6 +593,8 @@
...
@@ -676,6 +593,8 @@
array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index
array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index
);
);
eventTarget.dispatchEvent(new CustomEvent('chat:start'));
const [res, controller] = await generateChatCompletion(localStorage.token, {
const [res, controller] = await generateChatCompletion(localStorage.token, {
model: model.id,
model: model.id,
messages: messagesBody,
messages: messagesBody,
...
@@ -745,6 +664,9 @@
...
@@ -745,6 +664,9 @@
continue;
continue;
} else {
} else {
responseMessage.content += data.message.content;
responseMessage.content += data.message.content;
eventTarget.dispatchEvent(
new CustomEvent('chat', { detail: { content: data.message.content } })
);
messages = messages;
messages = messages;
}
}
} else {
} else {
...
@@ -771,21 +693,13 @@
...
@@ -771,21 +693,13 @@
messages = messages;
messages = messages;
if ($settings.notificationEnabled && !document.hasFocus()) {
if ($settings.notificationEnabled && !document.hasFocus()) {
const notification = new Notification(
const notification = new Notification(`${model.id}`, {
selectedModelfile
body: responseMessage.content,
? `${
icon: `${WEBUI_BASE_URL}/static/favicon.png`
selectedModelfile.title.charAt(0).toUpperCase() +
});
selectedModelfile.title.slice(1)
}`
: `${model.id}`,
{
body: responseMessage.content,
icon: selectedModelfile?.imageUrl ?? `${WEBUI_BASE_URL}/static/favicon.png`
}
);
}
}
if ($settings.responseAutoCopy) {
if ($settings
?
.responseAutoCopy
?? false
) {
copyToClipboard(responseMessage.content);
copyToClipboard(responseMessage.content);
}
}
...
@@ -846,6 +760,7 @@
...
@@ -846,6 +760,7 @@
stopResponseFlag = false;
stopResponseFlag = false;
await tick();
await tick();
eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
if (autoScroll) {
if (autoScroll) {
scrollToBottom();
scrollToBottom();
...
@@ -887,6 +802,8 @@
...
@@ -887,6 +802,8 @@
scrollToBottom();
scrollToBottom();
eventTarget.dispatchEvent(new CustomEvent('chat:start'));
try {
try {
const [res, controller] = await generateOpenAIChatCompletion(
const [res, controller] = await generateOpenAIChatCompletion(
localStorage.token,
localStorage.token,
...
@@ -1007,6 +924,7 @@
...
@@ -1007,6 +924,7 @@
continue;
continue;
} else {
} else {
responseMessage.content += value;
responseMessage.content += value;
eventTarget.dispatchEvent(new CustomEvent('chat', { detail: { content: value } }));
messages = messages;
messages = messages;
}
}
...
@@ -1057,6 +975,8 @@
...
@@ -1057,6 +975,8 @@
stopResponseFlag = false;
stopResponseFlag = false;
await tick();
await tick();
eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
if (autoScroll) {
if (autoScroll) {
scrollToBottom();
scrollToBottom();
}
}
...
@@ -1123,9 +1043,12 @@
...
@@ -1123,9 +1043,12 @@
let userPrompt = userMessage.content;
let userPrompt = userMessage.content;
if ((userMessage?.models ?? [...selectedModels]).length == 1) {
if ((userMessage?.models ?? [...selectedModels]).length == 1) {
await sendPrompt(userPrompt, userMessage.id, undefined, false);
// If user message has only one model selected, sendPrompt automatically selects it for regeneration
await sendPrompt(userPrompt, userMessage.id);
} else {
} else {
await sendPrompt(userPrompt, userMessage.id, message.model, false);
// If there are multiple models selected, use the model of the response message for regeneration
// e.g. many model chat
await sendPrompt(userPrompt, userMessage.id, { modelId: message.model });
}
}
}
}
};
};
...
@@ -1191,6 +1114,84 @@
...
@@ -1191,6 +1114,84 @@
}
}
};
};
const getWebSearchResults = async (model: string, parentId: string, responseId: string) => {
const responseMessage = history.messages[responseId];
responseMessage.statusHistory = [
{
done: false,
action: 'web_search',
description: $i18n.t('Generating search query')
}
];
messages = messages;
const prompt = history.messages[parentId].content;
let searchQuery = await generateSearchQuery(localStorage.token, model, messages, prompt).catch(
(error) => {
console.log(error);
return prompt;
}
);
if (!searchQuery) {
toast.warning($i18n.t('No search query generated'));
responseMessage.statusHistory.push({
done: true,
error: true,
action: 'web_search',
description: 'No search query generated'
});
messages = messages;
}
responseMessage.statusHistory.push({
done: false,
action: 'web_search',
description: $i18n.t(`Searching "{{searchQuery}}"`, { searchQuery })
});
messages = messages;
const results = await runWebSearch(localStorage.token, searchQuery).catch((error) => {
console.log(error);
toast.error(error);
return null;
});
if (results) {
responseMessage.statusHistory.push({
done: true,
action: 'web_search',
description: $i18n.t('Searched {{count}} sites', { count: results.filenames.length }),
query: searchQuery,
urls: results.filenames
});
if (responseMessage?.files ?? undefined === undefined) {
responseMessage.files = [];
}
responseMessage.files.push({
collection_name: results.collection_name,
name: searchQuery,
type: 'web_search_results',
urls: results.filenames
});
messages = messages;
} else {
responseMessage.statusHistory.push({
done: true,
error: true,
action: 'web_search',
description: 'No search results found'
});
messages = messages;
}
};
const getTags = async () => {
const getTags = async () => {
return await getTagsById(localStorage.token, $chatId).catch(async (error) => {
return await getTagsById(localStorage.token, $chatId).catch(async (error) => {
return [];
return [];
...
@@ -1206,7 +1207,14 @@
...
@@ -1206,7 +1207,14 @@
</title>
</title>
</svelte:head>
</svelte:head>
<CallOverlay {submitPrompt} bind:files />
<CallOverlay
{submitPrompt}
{stopResponse}
bind:files
modelId={selectedModelIds?.at(0) ?? null}
chatId={$chatId}
{eventTarget}
/>
{#if !chatIdProp || (loaded && chatIdProp)}
{#if !chatIdProp || (loaded && chatIdProp)}
<div
<div
...
...
src/lib/components/chat/MessageInput.svelte
View file @
4727e5cb
...
@@ -348,7 +348,6 @@
...
@@ -348,7 +348,6 @@
<Models
<Models
bind:this={modelsElement}
bind:this={modelsElement}
bind:prompt
bind:prompt
bind:user
bind:chatInputPlaceholder
bind:chatInputPlaceholder
{messages}
{messages}
on:select={(e) => {
on:select={(e) => {
...
@@ -467,7 +466,7 @@
...
@@ -467,7 +466,7 @@
document.getElementById('chat-textarea')?.focus();
document.getElementById('chat-textarea')?.focus();
if ($settings?.speechAutoSend ?? false) {
if ($settings?.speechAutoSend ?? false) {
submitPrompt(prompt
, user
);
submitPrompt(prompt);
}
}
}}
}}
/>
/>
...
@@ -476,7 +475,7 @@
...
@@ -476,7 +475,7 @@
class="w-full flex gap-1.5"
class="w-full flex gap-1.5"
on:submit|preventDefault={() => {
on:submit|preventDefault={() => {
// check if selectedModels support image input
// check if selectedModels support image input
submitPrompt(prompt
, user
);
submitPrompt(prompt);
}}
}}
>
>
<div
<div
...
@@ -718,7 +717,7 @@
...
@@ -718,7 +717,7 @@
// Submit the prompt when Enter key is pressed
// Submit the prompt when Enter key is pressed
if (prompt !== '' && e.key === 'Enter' && !e.shiftKey) {
if (prompt !== '' && e.key === 'Enter' && !e.shiftKey) {
submitPrompt(prompt
, user
);
submitPrompt(prompt);
}
}
}
}
}}
}}
...
...
src/lib/components/chat/MessageInput/CallOverlay.svelte
View file @
4727e5cb
...
@@ -3,36 +3,170 @@
...
@@ -3,36 +3,170 @@
import { onMount, tick, getContext } from 'svelte';
import { onMount, tick, getContext } from 'svelte';
import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
import { generateEmoji } from '$lib/apis';
import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
import { toast } from 'svelte-sonner';
import { toast } from 'svelte-sonner';
import Tooltip from '$lib/components/common/Tooltip.svelte';
import Tooltip from '$lib/components/common/Tooltip.svelte';
import VideoInputMenu from './CallOverlay/VideoInputMenu.svelte';
import VideoInputMenu from './CallOverlay/VideoInputMenu.svelte';
import { get } from 'svelte/store';
const i18n = getContext('i18n');
const i18n = getContext('i18n');
export let eventTarget: EventTarget;
export let submitPrompt: Function;
export let submitPrompt: Function;
export let stopResponse: Function;
export let files;
export let files;
export let chatId;
export let modelId;
let loading = false;
let loading = false;
let confirmed = false;
let confirmed = false;
let interrupted = false;
let emoji = null;
let camera = false;
let camera = false;
let cameraStream = null;
let cameraStream = null;
let assistantSpeaking = false;
let assistantSpeaking = false;
let assistantAudio = {};
let assistantAudioIdx = null;
let rmsLevel = 0;
let chatStreaming = false;
let hasStartedSpeaking = false;
let assistantMessage = '';
let assistantSentences = [];
let assistantSentenceAudios = {};
let assistantSentenceIdx = -1;
let audioQueue = [];
let emojiQueue = [];
$: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
let currentUtterance = null;
let currentUtterance = null;
let rmsLevel = 0;
let hasStartedSpeaking = false;
let mediaRecorder;
let mediaRecorder;
let audioChunks = [];
let audioChunks = [];
let videoInputDevices = [];
let selectedVideoInputDeviceId = null;
const getVideoInputDevices = async () => {
const devices = await navigator.mediaDevices.enumerateDevices();
videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
if (!!navigator.mediaDevices.getDisplayMedia) {
videoInputDevices = [
...videoInputDevices,
{
deviceId: 'screen',
label: 'Screen Share'
}
];
}
console.log(videoInputDevices);
if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
}
};
const startCamera = async () => {
await getVideoInputDevices();
if (cameraStream === null) {
camera = true;
await tick();
try {
await startVideoStream();
} catch (err) {
console.error('Error accessing webcam: ', err);
}
}
};
const startVideoStream = async () => {
const video = document.getElementById('camera-feed');
if (video) {
if (selectedVideoInputDeviceId === 'screen') {
cameraStream = await navigator.mediaDevices.getDisplayMedia({
video: {
cursor: 'always'
},
audio: false
});
} else {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
}
});
}
if (cameraStream) {
await getVideoInputDevices();
video.srcObject = cameraStream;
await video.play();
}
}
};
const stopVideoStream = async () => {
if (cameraStream) {
const tracks = cameraStream.getTracks();
tracks.forEach((track) => track.stop());
}
cameraStream = null;
};
const takeScreenshot = () => {
const video = document.getElementById('camera-feed');
const canvas = document.getElementById('camera-canvas');
if (!canvas) {
return;
}
const context = canvas.getContext('2d');
// Make the canvas match the video dimensions
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
// Draw the image from the video onto the canvas
context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
// Convert the canvas to a data base64 URL and console log it
const dataURL = canvas.toDataURL('image/png');
console.log(dataURL);
return dataURL;
};
const stopCamera = async () => {
await stopVideoStream();
camera = false;
};
const MIN_DECIBELS = -45;
const MIN_DECIBELS = -45;
const VISUALIZER_BUFFER_LENGTH = 300;
const VISUALIZER_BUFFER_LENGTH = 300;
...
@@ -46,15 +180,6 @@
...
@@ -46,15 +180,6 @@
return Math.sqrt(sumSquares / data.length);
return Math.sqrt(sumSquares / data.length);
};
};
const normalizeRMS = (rms) => {
rms = rms * 10;
const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
const scaledRMS = Math.pow(rms, exp);
// Scale between 0.01 (1%) and 1.0 (100%)
return Math.min(1.0, Math.max(0.01, scaledRMS));
};
const analyseAudio = (stream) => {
const analyseAudio = (stream) => {
const audioContext = new AudioContext();
const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream);
const audioStreamSource = audioContext.createMediaStreamSource(stream);
...
@@ -74,12 +199,9 @@
...
@@ -74,12 +199,9 @@
const detectSound = () => {
const detectSound = () => {
const processFrame = () => {
const processFrame = () => {
if (!mediaRecorder || !$showCallOverlay) {
if (!mediaRecorder || !$showCallOverlay) {
if (mediaRecorder) {
mediaRecorder.stop();
}
return;
return;
}
}
analyser.getByteTimeDomainData(timeDomainData);
analyser.getByteTimeDomainData(timeDomainData);
analyser.getByteFrequencyData(domainData);
analyser.getByteFrequencyData(domainData);
...
@@ -89,9 +211,12 @@
...
@@ -89,9 +211,12 @@
// Check if initial speech/noise has started
// Check if initial speech/noise has started
const hasSound = domainData.some((value) => value > 0);
const hasSound = domainData.some((value) => value > 0);
if (hasSound) {
if (hasSound) {
stopAllAudio();
hasStartedSpeaking = true;
hasStartedSpeaking = true;
lastSoundTime = Date.now();
lastSoundTime = Date.now();
// BIG RED TEXT
console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
stopAllAudio();
}
}
// Start silence detection only after initial speech/noise has been detected
// Start silence detection only after initial speech/noise has been detected
...
@@ -114,181 +239,212 @@
...
@@ -114,181 +239,212 @@
detectSound();
detectSound();
};
};
const stopAllAudio = () => {
const transcribeHandler = async (audioBlob) => {
// Create a blob from the audio chunks
await tick();
const file = blobToFile(audioBlob, 'recording.wav');
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
toast.error(error);
return null;
});
if (res) {
console.log(res.text);
if (res.text !== '') {
const _responses = await submitPrompt(res.text, { _raw: true });
console.log(_responses);
}
}
};
const stopAllAudio = async () => {
interrupted = true;
if (chatStreaming) {
stopResponse();
}
if (currentUtterance) {
if (currentUtterance) {
speechSynthesis.cancel();
speechSynthesis.cancel();
currentUtterance = null;
currentUtterance = null;
}
}
if (assistantAudio[assistantAudioIdx]) {
assistantAudio[assistantAudioIdx].pause();
await tick();
assistantAudio[assistantAudioIdx].currentTime = 0;
emojiQueue = [];
}
audioQueue = [];
await tick();
const audioElement = document.getElementById('audioElement');
const audioElement = document.getElementById('audioElement');
audioElement.pause();
if (audioElement) {
audioElement.currentTime = 0;
audioElement.pause();
audioElement.currentTime = 0;
}
assistantSpeaking = false;
assistantSpeaking = false;
};
};
const
playAudio = (idx
) => {
const
speakSpeechSynthesisHandler = (content
) => {
if ($showCallOverlay) {
if ($showCallOverlay) {
return new Promise((res) => {
return new Promise((resolve) => {
assistantAudioIdx = idx;
let voices = [];
const audioElement = document.getElementById('audioElement');
const getVoicesLoop = setInterval(async () => {
const audio = assistantAudio[idx];
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
audioElement.src = audio.src; // Assume `assistantAudio` has objects with a `src` property
clearInterval(getVoicesLoop);
audioElement.muted = true;
const voice =
voices
audioElement
?.filter(
.play()
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
.then(() => {
)
audioElement.muted = false;
?.at(0) ?? undefined;
})
.catch((error) => {
currentUtterance = new SpeechSynthesisUtterance(content);
toast.error(error);
});
if (voice) {
currentUtterance.voice = voice;
audioElement.onended = async (e) => {
}
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(assistantAudio).length - 1 === idx) {
speechSynthesis.speak(currentUtterance);
assistantSpeaking = false;
currentUtterance.onend = async (e) => {
await new Promise((r) => setTimeout(r, 100));
resolve(e);
};
}
}
}, 100);
res(e);
};
});
});
} else {
} else {
return Promise.resolve();
return Promise.resolve();
}
}
};
};
const getOpenAISpeech = async (text) => {
const playAudio = (audio) => {
const res = await synthesizeOpenAISpeech(
if ($showCallOverlay) {
localStorage.token,
return new Promise((resolve) => {
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
const audioElement = document.getElementById('audioElement');
text
).catch((error) => {
toast.error(error);
assistantSpeaking = false;
return null;
});
if (res) {
if (audioElement) {
const blob = await res.blob();
audioElement.src = audio.src;
const blobUrl = URL.createObjectURL(blob);
audioElement.muted = true;
const audio = new Audio(blobUrl);
assistantAudio = audio;
audioElement
.play()
.then(() => {
audioElement.muted = false;
})
.catch((error) => {
console.error(error);
});
audioElement.onended = async (e) => {
await new Promise((r) => setTimeout(r, 100));
resolve(e);
};
}
});
} else {
return Promise.resolve();
}
}
};
};
const transcribeHandler = async (audioBlob) => {
const playAudioHandler = async () => {
// Create a blob from the audio chunks
console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
await tick();
assistantSpeaking = true;
const file = blobToFile(audioBlob, 'recording.wav');
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
if ($settings?.showEmojiInCall ?? false) {
toast.error(error);
if (emojiQueue.length > 0) {
return null;
emoji = emojiQueue.shift();
});
emojiQueue = emojiQueue;
if (res) {
console.log(res.text);
if (res.text !== '') {
const _responses = await submitPrompt(res.text);
console.log(_responses);
if (_responses.at(0)) {
const content = _responses[0];
if ((content ?? '').trim() !== '') {
assistantSpeakingHandler(content);
}
}
}
}
}
const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
audioQueue = audioQueue;
await playAudio(audioToPlay);
assistantSpeaking = false;
}
}
};
};
const assistantSpeakingHandler = async (content) => {
const setContentAudio = async (content, idx) => {
assistantSpeaking = true;
if (assistantSentenceAudios[idx] === undefined) {
// Wait for the previous audio to be loaded
if (($config.audio.tts.engine ?? '') == '') {
if (idx > 0) {
let voices = [];
await new Promise((resolve) => {
const getVoicesLoop = setInterval(async () => {
const check = setInterval(() => {
voices = await speechSynthesis.getVoices();
if (
if (voices.length > 0) {
assistantSentenceAudios[idx - 1] !== undefined &&
clearInterval(getVoicesLoop);
assistantSentenceAudios[idx - 1] !== null
) {
clearInterval(check);
resolve();
}
}, 100);
});
}
const voice =
assistantSentenceAudios[idx] = null;
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
currentUtterance = new SpeechSynthesisUtterance(content);
if ($settings?.showEmojiInCall ?? false) {
const sentenceEmoji = await generateEmoji(localStorage.token, modelId, content);
if (
voice
) {
if (
sentenceEmoji
) {
currentUtterance.voice = voice;
// Big red text with content and emoji
}
console.log('%c%s', 'color: blue; font-size: 10px;', `${sentenceEmoji}: ${content}`);
speechSynthesis.speak(currentUtterance);
if (/\p{Extended_Pictographic}/u.test(sentenceEmoji)) {
}
emojiQueue.push(sentenceEmoji.match(/\p{Extended_Pictographic}/gu)[0]);
}, 100);
emojiQueue = emojiQueue;
} else if ($config.audio.tts.engine === 'openai') {
console.log('openai');
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
}
} else {
mergedTexts.push(currentText);
}
}
return mergedTexts;
}, []);
console.log(sentences);
await tick();
}
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
content
).catch((error) => {
toast.error(error);
assistantSpeaking = false;
return null;
});
for (const [idx, sentence] of sentences.entries()) {
if (res) {
const res = await synthesizeOpenAISpeech(
const blob = await res.blob();
localStorage.token,
const blobUrl = URL.createObjectURL(blob);
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
const audio = new Audio(blobUrl);
sentence
assistantSentenceAudios[idx] = audio;
).catch((error) => {
toast.error(error);
assistantSpeaking = false;
console.log('%c%s', 'color: red; font-size: 20px;', content);
return null;
});
if (res) {
audioQueue.push(audio);
const blob = await res.blob();
audioQueue = audioQueue;
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
assistantAudio[idx] = audio;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
}
}
}
};
};
const stopRecordingCallback = async () => {
const stopRecordingCallback = async (
_continue = true
) => {
if ($showCallOverlay) {
if ($showCallOverlay) {
console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
// deep copy the audioChunks array
const _audioChunks = audioChunks.slice(0);
audioChunks = [];
mediaRecorder = false;
if (_continue) {
startRecording();
}
if (confirmed) {
if (confirmed) {
loading = true;
loading = true;
emoji = null;
if (cameraStream) {
if (cameraStream) {
const imageUrl = takeScreenshot();
const imageUrl = takeScreenshot();
...
@@ -301,16 +457,12 @@
...
@@ -301,16 +457,12 @@
];
];
}
}
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
const audioBlob = new Blob(
_
audioChunks, { type: 'audio/wav' });
await transcribeHandler(audioBlob);
await transcribeHandler(audioBlob);
confirmed = false;
confirmed = false;
loading = false;
loading = false;
}
}
audioChunks = [];
mediaRecorder = false;
startRecording();
} else {
} else {
audioChunks = [];
audioChunks = [];
mediaRecorder = false;
mediaRecorder = false;
...
@@ -332,129 +484,120 @@
...
@@ -332,129 +484,120 @@
};
};
mediaRecorder.onstop = async () => {
mediaRecorder.onstop = async () => {
console.log('Recording stopped');
console.log('Recording stopped');
await stopRecordingCallback();
await stopRecordingCallback();
};
};
mediaRecorder.start();
mediaRecorder.start();
};
};
let videoInputDevices = [];
const resetAssistantMessage = async () => {
let selectedVideoInputDeviceId = null
;
interrupted = false
;
const getVideoInputDevices = async () => {
assistantMessage = '';
const devices = await navigator.mediaDevices.enumerateDevices();
assistantSentenceIdx = -1;
videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
assistantSentenceAudios = {}; // Reset audio tracking
audioQueue = []; // Clear the audio queue
audioQueue = audioQueue;
if (!!navigator.mediaDevices.getDisplayMedia) {
emoji = null;
videoInputDevices = [
emojiQueue = [];
...videoInputDevices,
emojiQueue = emojiQueue;
{
deviceId: 'screen',
label: 'Screen Share'
}
];
}
console.log(videoInputDevices);
if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
}
};
};
const startCamera = async () => {
$: (async () => {
await getVideoInputDevices();
if ($showCallOverlay) {
await resetAssistantMessage();
if (cameraStream === null) {
camera = true;
await tick();
await tick();
try {
startRecording();
await startVideoStream();
} else {
} catch (err) {
stopCamera();
console.error('Error accessing webcam: ', err);
stopAllAudio();
}
stopRecordingCallback(false);
}
};
const startVideoStream = async () => {
const video = document.getElementById('camera-feed');
if (video) {
if (selectedVideoInputDeviceId === 'screen') {
cameraStream = await navigator.mediaDevices.getDisplayMedia({
video: {
cursor: 'always'
},
audio: false
});
} else {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
}
});
}
if (cameraStream) {
await getVideoInputDevices();
video.srcObject = cameraStream;
await video.play();
}
}
}
};
}
)()
;
const stopVideoStream = async () => {
$: {
if (cameraStream) {
if (audioQueue.length > 0 && !assistantSpeaking) {
const tracks = cameraStream.getTracks();
playAudioHandler();
tracks.forEach((track) => track.stop());
}
}
}
cameraStream = null;
onMount(() => {
};
eventTarget.addEventListener('chat:start', async (e) => {
if ($showCallOverlay) {
console.log('Chat start event:', e);
await resetAssistantMessage();
await tick();
chatStreaming = true;
}
});
const takeScreenshot = () => {
eventTarget.addEventListener('chat', async (e) => {
const video = document.getElementById('camera-feed');
if ($showCallOverlay) {
const canvas = document.getElementById('camera-canvas');
const { content } = e.detail;
assistantMessage += content;
await tick();
if (!
canvas
) {
if (!
interrupted
) {
return;
if ($config.audio.tts.engine !== '') {
}
assistantSentenceIdx = assistantSentences.length - 2;
const context = canvas.getContext('2d');
if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
await tick();
// Make the canvas match the video dimensions
setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
canvas.width = video.videoWidth;
}
canvas.height = video.videoHeight;
}
}
// Draw the image from the video onto the canvas
chatStreaming = true;
context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
}
});
// Convert the canvas to a data base64 URL and console log it
eventTarget.addEventListener('chat:finish', async (e) => {
const dataURL = canvas.toDataURL('image/png');
if ($showCallOverlay) {
console.log(dataURL);
chatStreaming = false;
loading = false;
return dataURL
;
console.log('Chat finish event:', e)
;
}
;
await tick()
;
const stopCamera = async () => {
if (!interrupted) {
await stopVideoStream();
if ($config.audio.tts.engine !== '') {
camera = false;
for (const [idx, sentence] of assistantSentences.entries()) {
};
if (!assistantSentenceAudios[idx]) {
await tick();
setContentAudio(sentence, idx);
}
}
} else {
if ($settings?.showEmojiInCall ?? false) {
const res = await generateEmoji(localStorage.token, modelId, assistantMessage);
if (res) {
console.log(res);
if (/\p{Extended_Pictographic}/u.test(res)) {
emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
}
}
}
$: if ($showCallOverlay) {
speakSpeechSynthesisHandler(assistantMessage);
startRecording();
}
} else {
}
stopCamera();
}
}
});
});
</script>
</script>
<audio id="audioElement" src="" style="display: none;" />
{#if $showCallOverlay}
{#if $showCallOverlay}
<audio id="audioElement" src="" style="display: none;" />
<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
<div
<div
class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
>
>
<div class="max-w-lg w-full h-screen max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
<div class="max-w-lg w-full h-screen max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
{#if camera}
{#if camera}
<div class="flex justify-center items-center w-full min-h-20">
<div class="flex justify-center items-center w-full
h-20
min-h-20">
{#if loading}
{#if loading}
<svg
<svg
class="size-12 text-gray-900 dark:text-gray-400"
class="size-12 text-gray-900 dark:text-gray-400"
...
@@ -492,6 +635,19 @@
...
@@ -492,6 +635,19 @@
r="3"
r="3"
/><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
/><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
>
>
{:else if emoji}
<div
class=" transition-all rounded-full"
style="font-size:{rmsLevel * 100 > 4
? '4.5'
: rmsLevel * 100 > 2
? '4.25'
: rmsLevel * 100 > 1
? '3.75'
: '3.5'}rem;width: 100%; text-align:center;"
>
{emoji}
</div>
{:else}
{:else}
<div
<div
class=" {rmsLevel * 100 > 4
class=" {rmsLevel * 100 > 4
...
@@ -546,6 +702,19 @@
...
@@ -546,6 +702,19 @@
r="3"
r="3"
/><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
/><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
>
>
{:else if emoji}
<div
class=" transition-all rounded-full"
style="font-size:{rmsLevel * 100 > 4
? '13'
: rmsLevel * 100 > 2
? '12'
: rmsLevel * 100 > 1
? '11.5'
: '11'}rem;width:100%;text-align:center;"
>
{emoji}
</div>
{:else}
{:else}
<div
<div
class=" {rmsLevel * 100 > 4
class=" {rmsLevel * 100 > 4
...
...
src/lib/components/chat/Messages.svelte
View file @
4727e5cb
...
@@ -79,7 +79,7 @@
...
@@ -79,7 +79,7 @@
history.currentId = userMessageId;
history.currentId = userMessageId;
await tick();
await tick();
await sendPrompt(userPrompt, userMessageId
, undefined, false
);
await sendPrompt(userPrompt, userMessageId);
};
};
const updateChatMessages = async () => {
const updateChatMessages = async () => {
...
...
src/lib/components/chat/Settings/Interface.svelte
View file @
4727e5cb
...
@@ -20,9 +20,12 @@
...
@@ -20,9 +20,12 @@
// Interface
// Interface
let defaultModelId = '';
let defaultModelId = '';
let showUsername = false;
let showUsername = false;
let chatBubble = true;
let chatBubble = true;
let chatDirection: 'LTR' | 'RTL' = 'LTR';
let chatDirection: 'LTR' | 'RTL' = 'LTR';
let showEmojiInCall = false;
const toggleSplitLargeChunks = async () => {
const toggleSplitLargeChunks = async () => {
splitLargeChunks = !splitLargeChunks;
splitLargeChunks = !splitLargeChunks;
saveSettings({ splitLargeChunks: splitLargeChunks });
saveSettings({ splitLargeChunks: splitLargeChunks });
...
@@ -43,6 +46,11 @@
...
@@ -43,6 +46,11 @@
saveSettings({ showUsername: showUsername });
saveSettings({ showUsername: showUsername });
};
};
const toggleEmojiInCall = async () => {
showEmojiInCall = !showEmojiInCall;
saveSettings({ showEmojiInCall: showEmojiInCall });
};
const toggleTitleAutoGenerate = async () => {
const toggleTitleAutoGenerate = async () => {
titleAutoGenerate = !titleAutoGenerate;
titleAutoGenerate = !titleAutoGenerate;
saveSettings({
saveSettings({
...
@@ -88,8 +96,12 @@
...
@@ -88,8 +96,12 @@
onMount(async () => {
onMount(async () => {
titleAutoGenerate = $settings?.title?.auto ?? true;
titleAutoGenerate = $settings?.title?.auto ?? true;
responseAutoCopy = $settings.responseAutoCopy ?? false;
responseAutoCopy = $settings.responseAutoCopy ?? false;
showUsername = $settings.showUsername ?? false;
showUsername = $settings.showUsername ?? false;
showEmojiInCall = $settings.showEmojiInCall ?? false;
chatBubble = $settings.chatBubble ?? true;
chatBubble = $settings.chatBubble ?? true;
widescreenMode = $settings.widescreenMode ?? false;
widescreenMode = $settings.widescreenMode ?? false;
splitLargeChunks = $settings.splitLargeChunks ?? false;
splitLargeChunks = $settings.splitLargeChunks ?? false;
...
@@ -192,6 +204,26 @@
...
@@ -192,6 +204,26 @@
</div>
</div>
</div>
</div>
<div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Display Emoji in Call')}</div>
<button
class="p-1 px-3 text-xs flex rounded transition"
on:click={() => {
toggleEmojiInCall();
}}
type="button"
>
{#if showEmojiInCall === true}
<span class="ml-2 self-center">{$i18n.t('On')}</span>
{:else}
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
{/if}
</button>
</div>
</div>
{#if !$settings.chatBubble}
{#if !$settings.chatBubble}
<div>
<div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" py-0.5 flex w-full justify-between">
...
...
src/lib/utils/index.ts
View file @
4727e5cb
...
@@ -436,7 +436,7 @@ export const removeEmojis = (str) => {
...
@@ -436,7 +436,7 @@ export const removeEmojis = (str) => {
export
const
extractSentences
=
(
text
)
=>
{
export
const
extractSentences
=
(
text
)
=>
{
// Split the paragraph into sentences based on common punctuation marks
// Split the paragraph into sentences based on common punctuation marks
const
sentences
=
text
.
split
(
/
(?<
=
[
.!?
])
/
);
const
sentences
=
text
.
split
(
/
(?<
=
[
.!?
])
\s
+
/
);
return
sentences
return
sentences
.
map
((
sentence
)
=>
removeEmojis
(
sentence
.
trim
()))
.
map
((
sentence
)
=>
removeEmojis
(
sentence
.
trim
()))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment