Commit 5300d2c5 authored by Timothy J. Baek's avatar Timothy J. Baek
Browse files

refac

parent d6fd2a82
...@@ -887,7 +887,7 @@ async def generate_emoji(form_data: dict, user=Depends(get_verified_user)): ...@@ -887,7 +887,7 @@ async def generate_emoji(form_data: dict, user=Depends(get_verified_user)):
model = app.state.MODELS[model_id] model = app.state.MODELS[model_id]
template = ''' template = '''
You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please avoid using generic or overly ambiguous emojis like "🤔", and instead, choose ones that vividly represent the speaker's mood or reaction. You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please choose ones that vividly represent the speaker's mood or reaction.
Message: """{{prompt}}""" Message: """{{prompt}}"""
''' '''
......
...@@ -1209,6 +1209,7 @@ ...@@ -1209,6 +1209,7 @@
<CallOverlay <CallOverlay
{submitPrompt} {submitPrompt}
{stopResponse}
bind:files bind:files
modelId={selectedModelIds?.at(0) ?? null} modelId={selectedModelIds?.at(0) ?? null}
chatId={$chatId} chatId={$chatId}
......
...@@ -14,16 +14,18 @@ ...@@ -14,16 +14,18 @@
const i18n = getContext('i18n'); const i18n = getContext('i18n');
export let eventTarget: EventTarget; export let eventTarget: EventTarget;
export let submitPrompt: Function; export let submitPrompt: Function;
export let stopResponse: Function;
export let files; export let files;
export let chatId; export let chatId;
export let modelId; export let modelId;
let message = '';
let loading = false; let loading = false;
let confirmed = false; let confirmed = false;
let interrupted = false;
let emoji = null; let emoji = null;
...@@ -31,17 +33,141 @@ ...@@ -31,17 +33,141 @@
let cameraStream = null; let cameraStream = null;
let assistantSpeaking = false; let assistantSpeaking = false;
let assistantAudio = {};
let assistantAudioIdx = null;
let rmsLevel = 0; let chatStreaming = false;
let hasStartedSpeaking = false; let assistantMessage = '';
let assistantSentences = [];
let assistantSentenceAudios = {};
let assistantSentenceIdx = -1;
let audioQueue = [];
$: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
let currentUtterance = null; let currentUtterance = null;
let rmsLevel = 0;
let hasStartedSpeaking = false;
let mediaRecorder; let mediaRecorder;
let audioChunks = []; let audioChunks = [];
$: console.log('hasStartedSpeaking', hasStartedSpeaking);
let videoInputDevices = [];
let selectedVideoInputDeviceId = null;
const getVideoInputDevices = async () => {
const devices = await navigator.mediaDevices.enumerateDevices();
videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
if (!!navigator.mediaDevices.getDisplayMedia) {
videoInputDevices = [
...videoInputDevices,
{
deviceId: 'screen',
label: 'Screen Share'
}
];
}
console.log(videoInputDevices);
if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
}
};
const startCamera = async () => {
await getVideoInputDevices();
if (cameraStream === null) {
camera = true;
await tick();
try {
await startVideoStream();
} catch (err) {
console.error('Error accessing webcam: ', err);
}
}
};
const startVideoStream = async () => {
const video = document.getElementById('camera-feed');
if (video) {
if (selectedVideoInputDeviceId === 'screen') {
cameraStream = await navigator.mediaDevices.getDisplayMedia({
video: {
cursor: 'always'
},
audio: false
});
} else {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
}
});
}
if (cameraStream) {
await getVideoInputDevices();
video.srcObject = cameraStream;
await video.play();
}
}
};
const stopVideoStream = async () => {
if (cameraStream) {
const tracks = cameraStream.getTracks();
tracks.forEach((track) => track.stop());
}
cameraStream = null;
};
const takeScreenshot = () => {
const video = document.getElementById('camera-feed');
const canvas = document.getElementById('camera-canvas');
if (!canvas) {
return;
}
const context = canvas.getContext('2d');
// Make the canvas match the video dimensions
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
// Draw the image from the video onto the canvas
context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
// Convert the canvas to a data base64 URL and console log it
const dataURL = canvas.toDataURL('image/png');
console.log(dataURL);
return dataURL;
};
const stopCamera = async () => {
await stopVideoStream();
camera = false;
};
const MIN_DECIBELS = -45; const MIN_DECIBELS = -45;
const VISUALIZER_BUFFER_LENGTH = 300; const VISUALIZER_BUFFER_LENGTH = 300;
...@@ -55,15 +181,6 @@ ...@@ -55,15 +181,6 @@
return Math.sqrt(sumSquares / data.length); return Math.sqrt(sumSquares / data.length);
}; };
const normalizeRMS = (rms) => {
rms = rms * 10;
const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
const scaledRMS = Math.pow(rms, exp);
// Scale between 0.01 (1%) and 1.0 (100%)
return Math.min(1.0, Math.max(0.01, scaledRMS));
};
const analyseAudio = (stream) => { const analyseAudio = (stream) => {
const audioContext = new AudioContext(); const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream); const audioStreamSource = audioContext.createMediaStreamSource(stream);
...@@ -83,12 +200,9 @@ ...@@ -83,12 +200,9 @@
const detectSound = () => { const detectSound = () => {
const processFrame = () => { const processFrame = () => {
if (!mediaRecorder || !$showCallOverlay) { if (!mediaRecorder || !$showCallOverlay) {
if (mediaRecorder) {
mediaRecorder.stop();
}
return; return;
} }
analyser.getByteTimeDomainData(timeDomainData); analyser.getByteTimeDomainData(timeDomainData);
analyser.getByteFrequencyData(domainData); analyser.getByteFrequencyData(domainData);
...@@ -98,9 +212,12 @@ ...@@ -98,9 +212,12 @@
// Check if initial speech/noise has started // Check if initial speech/noise has started
const hasSound = domainData.some((value) => value > 0); const hasSound = domainData.some((value) => value > 0);
if (hasSound) { if (hasSound) {
stopAllAudio();
hasStartedSpeaking = true; hasStartedSpeaking = true;
lastSoundTime = Date.now(); lastSoundTime = Date.now();
// BIG RED TEXT
console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
stopAllAudio();
} }
// Start silence detection only after initial speech/noise has been detected // Start silence detection only after initial speech/noise has been detected
...@@ -123,35 +240,94 @@ ...@@ -123,35 +240,94 @@
detectSound(); detectSound();
}; };
const stopAllAudio = () => { const transcribeHandler = async (audioBlob) => {
// Create a blob from the audio chunks
await tick();
const file = blobToFile(audioBlob, 'recording.wav');
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
toast.error(error);
return null;
});
if (res) {
console.log(res.text);
if (res.text !== '') {
const _responses = await submitPrompt(res.text, { _raw: true });
console.log(_responses);
}
}
};
const stopAllAudio = async () => {
interrupted = true;
if (chatStreaming) {
stopResponse();
}
if (currentUtterance) { if (currentUtterance) {
speechSynthesis.cancel(); speechSynthesis.cancel();
currentUtterance = null; currentUtterance = null;
} }
if (assistantAudio[assistantAudioIdx]) {
assistantAudio[assistantAudioIdx].pause();
assistantAudio[assistantAudioIdx].currentTime = 0;
}
const audioElement = document.getElementById('audioElement'); await tick();
audioQueue = [];
await tick();
const audioElement = document.getElementById('audioElement');
if (audioElement) { if (audioElement) {
audioElement.pause(); audioElement.pause();
audioElement.currentTime = 0; audioElement.currentTime = 0;
} }
assistantSpeaking = false; assistantSpeaking = false;
}; };
const playAudio = (idx) => { const speakSpeechSynthesisHandler = (content) => {
if ($showCallOverlay) {
return new Promise((resolve) => {
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
currentUtterance = new SpeechSynthesisUtterance(content);
if (voice) {
currentUtterance.voice = voice;
}
speechSynthesis.speak(currentUtterance);
currentUtterance.onend = async (e) => {
await new Promise((r) => setTimeout(r, 100));
resolve(e);
};
}
}, 100);
});
} else {
return Promise.resolve();
}
};
const playAudio = (audio) => {
if ($showCallOverlay) { if ($showCallOverlay) {
return new Promise((res) => { return new Promise((resolve) => {
assistantAudioIdx = idx;
const audioElement = document.getElementById('audioElement'); const audioElement = document.getElementById('audioElement');
const audio = assistantAudio[idx];
if (audioElement) { if (audioElement) {
audioElement.src = audio.src; // Assume `assistantAudio` has objects with a `src` property audioElement.src = audio.src;
audioElement.muted = true; audioElement.muted = true;
audioElement audioElement
...@@ -160,17 +336,12 @@ ...@@ -160,17 +336,12 @@
audioElement.muted = false; audioElement.muted = false;
}) })
.catch((error) => { .catch((error) => {
toast.error(error); console.error(error);
}); });
audioElement.onended = async (e) => { audioElement.onended = async (e) => {
await new Promise((r) => setTimeout(r, 300)); await new Promise((r) => setTimeout(r, 100));
resolve(e);
if (Object.keys(assistantAudio).length - 1 === idx) {
assistantSpeaking = false;
}
res(e);
}; };
} }
}); });
...@@ -179,147 +350,57 @@ ...@@ -179,147 +350,57 @@
} }
}; };
const getOpenAISpeech = async (text) => { const playAudioHandler = async () => {
const res = await synthesizeOpenAISpeech( console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
localStorage.token, if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice, assistantSpeaking = true;
text const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
).catch((error) => { audioQueue = audioQueue;
toast.error(error); await playAudio(audioToPlay);
assistantSpeaking = false; assistantSpeaking = false;
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
assistantAudio = audio;
} }
}; };
const transcribeHandler = async (audioBlob) => { const setContentAudio = async (content, idx) => {
// Create a blob from the audio chunks if (assistantSentenceAudios[idx] === undefined) {
console.log('%c%s', 'color: red; font-size: 20px;', content);
await tick();
const file = blobToFile(audioBlob, 'recording.wav'); assistantSentenceAudios[idx] = null;
const res = await synthesizeOpenAISpeech(
const res = await transcribeAudio(localStorage.token, file).catch((error) => { localStorage.token,
toast.error(error); $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
return null; content
}); ).catch((error) => {
toast.error(error);
if (res) { assistantSpeaking = false;
console.log(res.text); return null;
});
if (res.text !== '') {
const _responses = await submitPrompt(res.text, { _raw: true });
console.log(_responses);
}
}
};
const assistantSpeakingHandler = async (content) => {
assistantSpeaking = true;
if (modelId && ($settings?.showEmojiInCall ?? false)) {
console.log('Generating emoji');
const res = await generateEmoji(localStorage.token, modelId, content, chatId).catch(
(error) => {
console.error(error);
return null;
}
);
if (res) { if (res) {
console.log(res); const blob = await res.blob();
if (/\p{Extended_Pictographic}/u.test(res)) { const blobUrl = URL.createObjectURL(blob);
emoji = res.match(/\p{Extended_Pictographic}/gu)[0]; const audio = new Audio(blobUrl);
} assistantSentenceAudios[idx] = audio;
audioQueue.push(audio);
audioQueue = audioQueue;
} }
} }
};
if (($config.audio.tts.engine ?? '') == '') { const stopRecordingCallback = async (_continue = true) => {
let voices = []; console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
currentUtterance = new SpeechSynthesisUtterance(content);
if (voice) {
currentUtterance.voice = voice;
}
speechSynthesis.speak(currentUtterance);
currentUtterance.onend = async () => {
assistantSpeaking = false;
};
}
}, 100);
} else if ($config.audio.tts.engine === 'openai') {
console.log('openai');
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
console.log(sentences);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
sentence
).catch((error) => {
toast.error(error);
assistantSpeaking = false; if ($showCallOverlay) {
return null; // deep copy the audioChunks array
}); const _audioChunks = audioChunks.slice(0);
if (res) { audioChunks = [];
const blob = await res.blob(); mediaRecorder = false;
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
assistantAudio[idx] = audio;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
if (idx === sentences.length - 1) { if (_continue) {
lastPlayedAudioPromise.then(() => { startRecording();
assistantSpeaking = false;
});
}
}
} }
}
};
const stopRecordingCallback = async (_continue = true) => {
if ($showCallOverlay) {
if (confirmed) { if (confirmed) {
loading = true; loading = true;
emoji = null; emoji = null;
...@@ -335,18 +416,12 @@ ...@@ -335,18 +416,12 @@
]; ];
} }
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
await transcribeHandler(audioBlob); await transcribeHandler(audioBlob);
confirmed = false; confirmed = false;
loading = false; loading = false;
} }
audioChunks = [];
mediaRecorder = false;
if (_continue) {
startRecording();
}
} else { } else {
audioChunks = []; audioChunks = [];
mediaRecorder = false; mediaRecorder = false;
...@@ -368,113 +443,11 @@ ...@@ -368,113 +443,11 @@
}; };
mediaRecorder.onstop = async () => { mediaRecorder.onstop = async () => {
console.log('Recording stopped'); console.log('Recording stopped');
await stopRecordingCallback(); await stopRecordingCallback();
}; };
mediaRecorder.start(); mediaRecorder.start();
}; };
let videoInputDevices = [];
let selectedVideoInputDeviceId = null;
const getVideoInputDevices = async () => {
const devices = await navigator.mediaDevices.enumerateDevices();
videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
if (!!navigator.mediaDevices.getDisplayMedia) {
videoInputDevices = [
...videoInputDevices,
{
deviceId: 'screen',
label: 'Screen Share'
}
];
}
console.log(videoInputDevices);
if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
}
};
const startCamera = async () => {
await getVideoInputDevices();
if (cameraStream === null) {
camera = true;
await tick();
try {
await startVideoStream();
} catch (err) {
console.error('Error accessing webcam: ', err);
}
}
};
const startVideoStream = async () => {
const video = document.getElementById('camera-feed');
if (video) {
if (selectedVideoInputDeviceId === 'screen') {
cameraStream = await navigator.mediaDevices.getDisplayMedia({
video: {
cursor: 'always'
},
audio: false
});
} else {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
}
});
}
if (cameraStream) {
await getVideoInputDevices();
video.srcObject = cameraStream;
await video.play();
}
}
};
const stopVideoStream = async () => {
if (cameraStream) {
const tracks = cameraStream.getTracks();
tracks.forEach((track) => track.stop());
}
cameraStream = null;
};
const takeScreenshot = () => {
const video = document.getElementById('camera-feed');
const canvas = document.getElementById('camera-canvas');
if (!canvas) {
return;
}
const context = canvas.getContext('2d');
// Make the canvas match the video dimensions
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
// Draw the image from the video onto the canvas
context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
// Convert the canvas to a data base64 URL and console log it
const dataURL = canvas.toDataURL('image/png');
console.log(dataURL);
return dataURL;
};
const stopCamera = async () => {
await stopVideoStream();
camera = false;
};
$: if ($showCallOverlay) { $: if ($showCallOverlay) {
startRecording(); startRecording();
} else { } else {
...@@ -483,30 +456,73 @@ ...@@ -483,30 +456,73 @@
stopRecordingCallback(false); stopRecordingCallback(false);
} }
$: {
if (audioQueue.length > 0 && !assistantSpeaking) {
playAudioHandler();
}
}
onMount(() => { onMount(() => {
console.log(eventTarget); console.log(eventTarget);
eventTarget.addEventListener('chat:start', async (e) => { eventTarget.addEventListener('chat:start', async (e) => {
console.log('Chat start event:', e.detail); console.log('Chat start event:', e);
message = ''; interrupted = false;
assistantMessage = '';
assistantSentenceIdx = -1;
assistantSentenceAudios = {}; // Reset audio tracking
audioQueue = []; // Clear the audio queue
chatStreaming = true;
}); });
eventTarget.addEventListener('chat', async (e) => { eventTarget.addEventListener('chat', async (e) => {
const { content } = e.detail; const { content } = e.detail;
assistantMessage += content;
await tick();
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
assistantSentenceIdx = assistantSentences.length - 2;
if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
await tick();
setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
}
}
}
message += content; chatStreaming = true;
console.log('Chat event:', message);
}); });
eventTarget.addEventListener('chat:finish', async (e) => { eventTarget.addEventListener('chat:finish', async (e) => {
console.log('Chat finish event:', e.detail); chatStreaming = false;
message = ''; loading = false;
console.log('Chat finish event:', e);
await tick();
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
for (const [idx, sentence] of assistantSentences.entries()) {
if (!assistantSentenceAudios[idx]) {
await tick();
setContentAudio(sentence, idx);
}
}
} else {
emoji = generateEmoji(localStorage.token, modelId, assistantMessage);
speakSpeechSynthesisHandler(assistantMessage);
}
}
}); });
}); });
</script> </script>
<audio id="audioElement" src="" style="display: none;" />
{#if $showCallOverlay} {#if $showCallOverlay}
<audio id="audioElement" src="" style="display: none;" />
<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden"> <div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
<div <div
class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center" class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment