Commit 5300d2c5 authored by Timothy J. Baek's avatar Timothy J. Baek
Browse files

refac

parent d6fd2a82
...@@ -887,7 +887,7 @@ async def generate_emoji(form_data: dict, user=Depends(get_verified_user)): ...@@ -887,7 +887,7 @@ async def generate_emoji(form_data: dict, user=Depends(get_verified_user)):
model = app.state.MODELS[model_id] model = app.state.MODELS[model_id]
template = ''' template = '''
You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please avoid using generic or overly ambiguous emojis like "🤔", and instead, choose ones that vividly represent the speaker's mood or reaction. You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please choose ones that vividly represent the speaker's mood or reaction.
Message: """{{prompt}}""" Message: """{{prompt}}"""
''' '''
......
...@@ -1209,6 +1209,7 @@ ...@@ -1209,6 +1209,7 @@
<CallOverlay <CallOverlay
{submitPrompt} {submitPrompt}
{stopResponse}
bind:files bind:files
modelId={selectedModelIds?.at(0) ?? null} modelId={selectedModelIds?.at(0) ?? null}
chatId={$chatId} chatId={$chatId}
......
...@@ -14,16 +14,18 @@ ...@@ -14,16 +14,18 @@
const i18n = getContext('i18n'); const i18n = getContext('i18n');
export let eventTarget: EventTarget; export let eventTarget: EventTarget;
export let submitPrompt: Function; export let submitPrompt: Function;
export let stopResponse: Function;
export let files; export let files;
export let chatId; export let chatId;
export let modelId; export let modelId;
let message = '';
let loading = false; let loading = false;
let confirmed = false; let confirmed = false;
let interrupted = false;
let emoji = null; let emoji = null;
...@@ -31,17 +33,141 @@ ...@@ -31,17 +33,141 @@
let cameraStream = null; let cameraStream = null;
let assistantSpeaking = false; let assistantSpeaking = false;
let assistantAudio = {};
let assistantAudioIdx = null;
let rmsLevel = 0; let chatStreaming = false;
let hasStartedSpeaking = false; let assistantMessage = '';
let assistantSentences = [];
let assistantSentenceAudios = {};
let assistantSentenceIdx = -1;
let audioQueue = [];
$: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
let currentUtterance = null; let currentUtterance = null;
let rmsLevel = 0;
let hasStartedSpeaking = false;
let mediaRecorder; let mediaRecorder;
let audioChunks = []; let audioChunks = [];
$: console.log('hasStartedSpeaking', hasStartedSpeaking);
let videoInputDevices = [];
let selectedVideoInputDeviceId = null;
const getVideoInputDevices = async () => {
const devices = await navigator.mediaDevices.enumerateDevices();
videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
if (!!navigator.mediaDevices.getDisplayMedia) {
videoInputDevices = [
...videoInputDevices,
{
deviceId: 'screen',
label: 'Screen Share'
}
];
}
console.log(videoInputDevices);
if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
}
};
const startCamera = async () => {
await getVideoInputDevices();
if (cameraStream === null) {
camera = true;
await tick();
try {
await startVideoStream();
} catch (err) {
console.error('Error accessing webcam: ', err);
}
}
};
const startVideoStream = async () => {
const video = document.getElementById('camera-feed');
if (video) {
if (selectedVideoInputDeviceId === 'screen') {
cameraStream = await navigator.mediaDevices.getDisplayMedia({
video: {
cursor: 'always'
},
audio: false
});
} else {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
}
});
}
if (cameraStream) {
await getVideoInputDevices();
video.srcObject = cameraStream;
await video.play();
}
}
};
const stopVideoStream = async () => {
if (cameraStream) {
const tracks = cameraStream.getTracks();
tracks.forEach((track) => track.stop());
}
cameraStream = null;
};
const takeScreenshot = () => {
const video = document.getElementById('camera-feed');
const canvas = document.getElementById('camera-canvas');
if (!canvas) {
return;
}
const context = canvas.getContext('2d');
// Make the canvas match the video dimensions
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
// Draw the image from the video onto the canvas
context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
// Convert the canvas to a data base64 URL and console log it
const dataURL = canvas.toDataURL('image/png');
console.log(dataURL);
return dataURL;
};
const stopCamera = async () => {
await stopVideoStream();
camera = false;
};
const MIN_DECIBELS = -45; const MIN_DECIBELS = -45;
const VISUALIZER_BUFFER_LENGTH = 300; const VISUALIZER_BUFFER_LENGTH = 300;
...@@ -55,15 +181,6 @@ ...@@ -55,15 +181,6 @@
return Math.sqrt(sumSquares / data.length); return Math.sqrt(sumSquares / data.length);
}; };
const normalizeRMS = (rms) => {
rms = rms * 10;
const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
const scaledRMS = Math.pow(rms, exp);
// Scale between 0.01 (1%) and 1.0 (100%)
return Math.min(1.0, Math.max(0.01, scaledRMS));
};
const analyseAudio = (stream) => { const analyseAudio = (stream) => {
const audioContext = new AudioContext(); const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream); const audioStreamSource = audioContext.createMediaStreamSource(stream);
...@@ -83,12 +200,9 @@ ...@@ -83,12 +200,9 @@
const detectSound = () => { const detectSound = () => {
const processFrame = () => { const processFrame = () => {
if (!mediaRecorder || !$showCallOverlay) { if (!mediaRecorder || !$showCallOverlay) {
if (mediaRecorder) {
mediaRecorder.stop();
}
return; return;
} }
analyser.getByteTimeDomainData(timeDomainData); analyser.getByteTimeDomainData(timeDomainData);
analyser.getByteFrequencyData(domainData); analyser.getByteFrequencyData(domainData);
...@@ -98,9 +212,12 @@ ...@@ -98,9 +212,12 @@
// Check if initial speech/noise has started // Check if initial speech/noise has started
const hasSound = domainData.some((value) => value > 0); const hasSound = domainData.some((value) => value > 0);
if (hasSound) { if (hasSound) {
stopAllAudio();
hasStartedSpeaking = true; hasStartedSpeaking = true;
lastSoundTime = Date.now(); lastSoundTime = Date.now();
// BIG RED TEXT
console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
stopAllAudio();
} }
// Start silence detection only after initial speech/noise has been detected // Start silence detection only after initial speech/noise has been detected
...@@ -123,81 +240,6 @@ ...@@ -123,81 +240,6 @@
detectSound(); detectSound();
}; };
const stopAllAudio = () => {
if (currentUtterance) {
speechSynthesis.cancel();
currentUtterance = null;
}
if (assistantAudio[assistantAudioIdx]) {
assistantAudio[assistantAudioIdx].pause();
assistantAudio[assistantAudioIdx].currentTime = 0;
}
const audioElement = document.getElementById('audioElement');
if (audioElement) {
audioElement.pause();
audioElement.currentTime = 0;
}
assistantSpeaking = false;
};
const playAudio = (idx) => {
if ($showCallOverlay) {
return new Promise((res) => {
assistantAudioIdx = idx;
const audioElement = document.getElementById('audioElement');
const audio = assistantAudio[idx];
if (audioElement) {
audioElement.src = audio.src; // Assume `assistantAudio` has objects with a `src` property
audioElement.muted = true;
audioElement
.play()
.then(() => {
audioElement.muted = false;
})
.catch((error) => {
toast.error(error);
});
audioElement.onended = async (e) => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(assistantAudio).length - 1 === idx) {
assistantSpeaking = false;
}
res(e);
};
}
});
} else {
return Promise.resolve();
}
};
const getOpenAISpeech = async (text) => {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
text
).catch((error) => {
toast.error(error);
assistantSpeaking = false;
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
assistantAudio = audio;
}
};
const transcribeHandler = async (audioBlob) => { const transcribeHandler = async (audioBlob) => {
// Create a blob from the audio chunks // Create a blob from the audio chunks
...@@ -219,27 +261,34 @@ ...@@ -219,27 +261,34 @@
} }
}; };
const assistantSpeakingHandler = async (content) => { const stopAllAudio = async () => {
assistantSpeaking = true; interrupted = true;
if (modelId && ($settings?.showEmojiInCall ?? false)) { if (chatStreaming) {
console.log('Generating emoji'); stopResponse();
const res = await generateEmoji(localStorage.token, modelId, content, chatId).catch(
(error) => {
console.error(error);
return null;
} }
);
if (res) { if (currentUtterance) {
console.log(res); speechSynthesis.cancel();
if (/\p{Extended_Pictographic}/u.test(res)) { currentUtterance = null;
emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
}
} }
await tick();
audioQueue = [];
await tick();
const audioElement = document.getElementById('audioElement');
if (audioElement) {
audioElement.pause();
audioElement.currentTime = 0;
} }
if (($config.audio.tts.engine ?? '') == '') { assistantSpeaking = false;
};
const speakSpeechSynthesisHandler = (content) => {
if ($showCallOverlay) {
return new Promise((resolve) => {
let voices = []; let voices = [];
const getVoicesLoop = setInterval(async () => { const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices(); voices = await speechSynthesis.getVoices();
...@@ -260,43 +309,69 @@ ...@@ -260,43 +309,69 @@
} }
speechSynthesis.speak(currentUtterance); speechSynthesis.speak(currentUtterance);
currentUtterance.onend = async (e) => {
currentUtterance.onend = async () => { await new Promise((r) => setTimeout(r, 100));
assistantSpeaking = false; resolve(e);
}; };
} }
}, 100); }, 100);
} else if ($config.audio.tts.engine === 'openai') { });
console.log('openai');
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else { } else {
mergedTexts.push(currentText); return Promise.resolve();
} }
};
const playAudio = (audio) => {
if ($showCallOverlay) {
return new Promise((resolve) => {
const audioElement = document.getElementById('audioElement');
if (audioElement) {
audioElement.src = audio.src;
audioElement.muted = true;
audioElement
.play()
.then(() => {
audioElement.muted = false;
})
.catch((error) => {
console.error(error);
});
audioElement.onended = async (e) => {
await new Promise((r) => setTimeout(r, 100));
resolve(e);
};
}
});
} else { } else {
mergedTexts.push(currentText); return Promise.resolve();
} }
return mergedTexts; };
}, []);
console.log(sentences); const playAudioHandler = async () => {
console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
assistantSpeaking = true;
const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
audioQueue = audioQueue;
await playAudio(audioToPlay);
assistantSpeaking = false;
}
};
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately const setContentAudio = async (content, idx) => {
if (assistantSentenceAudios[idx] === undefined) {
console.log('%c%s', 'color: red; font-size: 20px;', content);
for (const [idx, sentence] of sentences.entries()) { assistantSentenceAudios[idx] = null;
const res = await synthesizeOpenAISpeech( const res = await synthesizeOpenAISpeech(
localStorage.token, localStorage.token,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice, $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
sentence content
).catch((error) => { ).catch((error) => {
toast.error(error); toast.error(error);
assistantSpeaking = false; assistantSpeaking = false;
return null; return null;
}); });
...@@ -305,21 +380,27 @@ ...@@ -305,21 +380,27 @@
const blob = await res.blob(); const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob); const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl); const audio = new Audio(blobUrl);
assistantAudio[idx] = audio; assistantSentenceAudios[idx] = audio;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); audioQueue.push(audio);
audioQueue = audioQueue;
if (idx === sentences.length - 1) {
lastPlayedAudioPromise.then(() => {
assistantSpeaking = false;
});
}
}
} }
} }
}; };
const stopRecordingCallback = async (_continue = true) => { const stopRecordingCallback = async (_continue = true) => {
console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
if ($showCallOverlay) { if ($showCallOverlay) {
// deep copy the audioChunks array
const _audioChunks = audioChunks.slice(0);
audioChunks = [];
mediaRecorder = false;
if (_continue) {
startRecording();
}
if (confirmed) { if (confirmed) {
loading = true; loading = true;
emoji = null; emoji = null;
...@@ -335,18 +416,12 @@ ...@@ -335,18 +416,12 @@
]; ];
} }
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
await transcribeHandler(audioBlob); await transcribeHandler(audioBlob);
confirmed = false; confirmed = false;
loading = false; loading = false;
} }
audioChunks = [];
mediaRecorder = false;
if (_continue) {
startRecording();
}
} else { } else {
audioChunks = []; audioChunks = [];
mediaRecorder = false; mediaRecorder = false;
...@@ -368,113 +443,11 @@ ...@@ -368,113 +443,11 @@
}; };
mediaRecorder.onstop = async () => { mediaRecorder.onstop = async () => {
console.log('Recording stopped'); console.log('Recording stopped');
await stopRecordingCallback(); await stopRecordingCallback();
}; };
mediaRecorder.start(); mediaRecorder.start();
}; };
let videoInputDevices = [];
let selectedVideoInputDeviceId = null;
const getVideoInputDevices = async () => {
const devices = await navigator.mediaDevices.enumerateDevices();
videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
if (!!navigator.mediaDevices.getDisplayMedia) {
videoInputDevices = [
...videoInputDevices,
{
deviceId: 'screen',
label: 'Screen Share'
}
];
}
console.log(videoInputDevices);
if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
}
};
const startCamera = async () => {
await getVideoInputDevices();
if (cameraStream === null) {
camera = true;
await tick();
try {
await startVideoStream();
} catch (err) {
console.error('Error accessing webcam: ', err);
}
}
};
const startVideoStream = async () => {
const video = document.getElementById('camera-feed');
if (video) {
if (selectedVideoInputDeviceId === 'screen') {
cameraStream = await navigator.mediaDevices.getDisplayMedia({
video: {
cursor: 'always'
},
audio: false
});
} else {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
}
});
}
if (cameraStream) {
await getVideoInputDevices();
video.srcObject = cameraStream;
await video.play();
}
}
};
const stopVideoStream = async () => {
if (cameraStream) {
const tracks = cameraStream.getTracks();
tracks.forEach((track) => track.stop());
}
cameraStream = null;
};
const takeScreenshot = () => {
const video = document.getElementById('camera-feed');
const canvas = document.getElementById('camera-canvas');
if (!canvas) {
return;
}
const context = canvas.getContext('2d');
// Make the canvas match the video dimensions
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
// Draw the image from the video onto the canvas
context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
// Convert the canvas to a data base64 URL and console log it
const dataURL = canvas.toDataURL('image/png');
console.log(dataURL);
return dataURL;
};
const stopCamera = async () => {
await stopVideoStream();
camera = false;
};
$: if ($showCallOverlay) { $: if ($showCallOverlay) {
startRecording(); startRecording();
} else { } else {
...@@ -483,30 +456,73 @@ ...@@ -483,30 +456,73 @@
stopRecordingCallback(false); stopRecordingCallback(false);
} }
$: {
if (audioQueue.length > 0 && !assistantSpeaking) {
playAudioHandler();
}
}
onMount(() => { onMount(() => {
console.log(eventTarget); console.log(eventTarget);
eventTarget.addEventListener('chat:start', async (e) => { eventTarget.addEventListener('chat:start', async (e) => {
console.log('Chat start event:', e.detail); console.log('Chat start event:', e);
message = ''; interrupted = false;
assistantMessage = '';
assistantSentenceIdx = -1;
assistantSentenceAudios = {}; // Reset audio tracking
audioQueue = []; // Clear the audio queue
chatStreaming = true;
}); });
eventTarget.addEventListener('chat', async (e) => { eventTarget.addEventListener('chat', async (e) => {
const { content } = e.detail; const { content } = e.detail;
assistantMessage += content;
await tick();
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
assistantSentenceIdx = assistantSentences.length - 2;
message += content; if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
console.log('Chat event:', message); await tick();
setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
}
}
}
chatStreaming = true;
}); });
eventTarget.addEventListener('chat:finish', async (e) => { eventTarget.addEventListener('chat:finish', async (e) => {
console.log('Chat finish event:', e.detail); chatStreaming = false;
message = ''; loading = false;
console.log('Chat finish event:', e);
await tick();
if (!interrupted) {
if ($config.audio.tts.engine !== '') {
for (const [idx, sentence] of assistantSentences.entries()) {
if (!assistantSentenceAudios[idx]) {
await tick();
setContentAudio(sentence, idx);
}
}
} else {
emoji = generateEmoji(localStorage.token, modelId, assistantMessage);
speakSpeechSynthesisHandler(assistantMessage);
}
}
}); });
}); });
</script> </script>
<audio id="audioElement" src="" style="display: none;" />
{#if $showCallOverlay} {#if $showCallOverlay}
<audio id="audioElement" src="" style="display: none;" />
<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden"> <div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
<div <div
class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center" class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment