From 05f756963cdccfcf9df3110896ce33254c8c6d71 Mon Sep 17 00:00:00 2001 From: Srayash Date: Sat, 26 Oct 2024 03:29:54 +0530 Subject: [PATCH 1/8] Feature: Added Text-To-Speech Functionality --- frontend/src/assets/speaker.svg | 4 ++ frontend/src/assets/stopspeech.svg | 5 ++ .../src/components/TextToSpeechButton.tsx | 64 +++++++++++++++++++ .../src/conversation/ConversationBubble.tsx | 10 ++- 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 frontend/src/assets/speaker.svg create mode 100644 frontend/src/assets/stopspeech.svg create mode 100644 frontend/src/components/TextToSpeechButton.tsx diff --git a/frontend/src/assets/speaker.svg b/frontend/src/assets/speaker.svg new file mode 100644 index 00000000..ea947330 --- /dev/null +++ b/frontend/src/assets/speaker.svg @@ -0,0 +1,4 @@ + + + + diff --git a/frontend/src/assets/stopspeech.svg b/frontend/src/assets/stopspeech.svg new file mode 100644 index 00000000..f77a235b --- /dev/null +++ b/frontend/src/assets/stopspeech.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/frontend/src/components/TextToSpeechButton.tsx b/frontend/src/components/TextToSpeechButton.tsx new file mode 100644 index 00000000..2ab7d1c2 --- /dev/null +++ b/frontend/src/components/TextToSpeechButton.tsx @@ -0,0 +1,64 @@ +import { useState } from 'react'; +import Speaker from '../assets/speaker.svg?react'; +import Stopspeech from '../assets/stopspeech.svg?react'; + +export default function SpeakButton({ + text, + colorLight, + colorDark, +}: { + text: string; + colorLight?: string; + colorDark?: string; +}) { + const [isSpeaking, setIsSpeaking] = useState(false); + const [isSpeakHovered, setIsSpeakHovered] = useState(false); + + const handleSpeakClick = (text: string) => { + if (isSpeaking) { + window.speechSynthesis.cancel(); + setIsSpeaking(false); + return; + } // Stop ongoing speech if already speaking + + const utterance = new SpeechSynthesisUtterance(text); + setIsSpeaking(true); + + utterance.onend = () => { + setIsSpeaking(false); // Reset when speech ends + }; + + utterance.onerror = () => { + console.error('Speech synthesis failed.'); + setIsSpeaking(false); + }; + + window.speechSynthesis.speak(utterance); + }; + + return ( +
+ {isSpeaking ? ( + handleSpeakClick(text)} + onMouseEnter={() => setIsSpeakHovered(true)} + onMouseLeave={() => setIsSpeakHovered(false)} + /> + ) : ( + handleSpeakClick(text)} + onMouseEnter={() => setIsSpeakHovered(true)} + onMouseLeave={() => setIsSpeakHovered(false)} + /> + )} +
+ ); +} diff --git a/frontend/src/conversation/ConversationBubble.tsx b/frontend/src/conversation/ConversationBubble.tsx index 2ccf1ca3..a9a05168 100644 --- a/frontend/src/conversation/ConversationBubble.tsx +++ b/frontend/src/conversation/ConversationBubble.tsx @@ -7,7 +7,6 @@ import remarkGfm from 'remark-gfm'; import remarkMath from 'remark-math'; import rehypeKatex from 'rehype-katex'; import 'katex/dist/katex.min.css'; - import DocsGPT3 from '../assets/cute_docsgpt3.svg'; import Dislike from '../assets/dislike.svg?react'; import Document from '../assets/document.svg'; @@ -23,6 +22,7 @@ import { } from '../preferences/preferenceSlice'; import classes from './ConversationBubble.module.css'; import { FEEDBACK, MESSAGE_TYPE } from './conversationModels'; +import SpeakButton from '../components/TextToSpeechButton'; const DisableSourceFE = import.meta.env.VITE_DISABLE_SOURCE_FE || false; @@ -336,6 +336,14 @@ const ConversationBubble = forwardRef< +
+
+ {/* Add SpeakButton here */} +
+
{type === 'ERROR' && (
{retryBtn}
From 91690ff99affef6260628ee7f7b1d97ddea18abf Mon Sep 17 00:00:00 2001 From: Srayash <146334722+Srayash@users.noreply.github.com> Date: Sun, 27 Oct 2024 23:02:52 +0530 Subject: [PATCH 2/8] Resize speaker icon. --- frontend/src/assets/speaker.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/assets/speaker.svg b/frontend/src/assets/speaker.svg index ea947330..6c379177 100644 --- a/frontend/src/assets/speaker.svg +++ b/frontend/src/assets/speaker.svg @@ -1,4 +1,4 @@ - + From fcb6bec4746314244d9abca42e97cf7949ec2a3e Mon Sep 17 00:00:00 2001 From: Srayash <146334722+Srayash@users.noreply.github.com> Date: Mon, 28 Oct 2024 02:52:13 +0530 Subject: [PATCH 3/8] Update TextToSpeechButton.tsx --- .../src/components/TextToSpeechButton.tsx | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/frontend/src/components/TextToSpeechButton.tsx b/frontend/src/components/TextToSpeechButton.tsx index 2ab7d1c2..cc062c87 100644 --- a/frontend/src/components/TextToSpeechButton.tsx +++ b/frontend/src/components/TextToSpeechButton.tsx @@ -1,6 +1,7 @@ import { useState } from 'react'; import Speaker from '../assets/speaker.svg?react'; import Stopspeech from '../assets/stopspeech.svg?react'; +import EasySpeech from 'easy-speech'; export default function SpeakButton({ text, @@ -14,26 +15,28 @@ export default function SpeakButton({ const [isSpeaking, setIsSpeaking] = useState(false); const [isSpeakHovered, setIsSpeakHovered] = useState(false); - const handleSpeakClick = (text: string) => { + const handleSpeakClick = async (text: string) => { if (isSpeaking) { - window.speechSynthesis.cancel(); + EasySpeech.cancel(); setIsSpeaking(false); return; } // Stop ongoing speech if already speaking - const utterance = new SpeechSynthesisUtterance(text); - setIsSpeaking(true); - - utterance.onend = () => { - setIsSpeaking(false); // Reset when speech ends - }; - - utterance.onerror = () => { - console.error('Speech synthesis failed.'); - setIsSpeaking(false); - }; - - window.speechSynthesis.speak(utterance); + try { + await EasySpeech.init(); // Initialize EasySpeech + setIsSpeaking(true); + + EasySpeech.speak({ + text, + onend: () => setIsSpeaking(false), // Reset when speech ends + onerror: () => { + console.error('Speech synthesis failed.'); + setIsSpeaking(false); + }, + }); + } catch (error) { + console.error('Failed to initialize speech synthesis', error); + } }; return ( From 6978e7439f9039b8c8cfe1f201b6d40e131db089 Mon Sep 17 00:00:00 2001 From: Srayash <146334722+Srayash@users.noreply.github.com> Date: Mon, 28 Oct 2024 02:53:07 +0530 Subject: [PATCH 4/8] Update package-lock.json --- frontend/package-lock.json | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 4087e4f5..37bf0c07 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@reduxjs/toolkit": "^2.2.7", "chart.js": "^4.4.4", + "easy-speech": "^2.4.0", "i18next": "^23.15.1", "i18next-browser-languagedetector": "^8.0.0", "prop-types": "^15.8.1", @@ -3075,6 +3076,24 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "dev": true }, + "node_modules/easy-speech": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/easy-speech/-/easy-speech-2.4.0.tgz", + "integrity": "sha512-wpMv29DEoeP/eyXr4aXpDqd9DvlXl7aQs7BgfKbjGVxqkmQPgNmpbF5YULaTH5bc/5qrteg5MDfCD2Zd0qr4rQ==", + "funding": [ + { + "type": "GitHub", + "url": "https://github.com/sponsors/jankapunkt" + }, + { + "type": "PayPal", + "url": "https://paypal.me/kuesterjan" + } + ], + "engines": { + "node": ">= 14.x" + } + }, "node_modules/electron-to-chromium": { "version": "1.5.11", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.11.tgz", From 0b1a3029959384df00fff0b5e0bce78c0fc52b94 Mon Sep 17 00:00:00 2001 From: Srayash <146334722+Srayash@users.noreply.github.com> Date: Mon, 28 Oct 2024 02:53:51 +0530 Subject: [PATCH 5/8] Update package.json --- frontend/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/package.json b/frontend/package.json index 83d531d6..9148831f 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -21,6 +21,7 @@ "dependencies": { "@reduxjs/toolkit": "^2.2.7", "chart.js": "^4.4.4", + "easy-speech": "^2.4.0", "i18next": "^23.15.1", "i18next-browser-languagedetector": "^8.0.0", "prop-types": "^15.8.1", From b223cf05d93be450b15c0a6dfc6c233d47717a13 Mon Sep 17 00:00:00 2001 From: Srayash Date: Tue, 29 Oct 2024 23:36:09 +0530 Subject: [PATCH 6/8] use /api/tts endpoint for TTS feature --- .../src/components/TextToSpeechButton.tsx | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/frontend/src/components/TextToSpeechButton.tsx b/frontend/src/components/TextToSpeechButton.tsx index cc062c87..b5402469 100644 --- a/frontend/src/components/TextToSpeechButton.tsx +++ b/frontend/src/components/TextToSpeechButton.tsx @@ -1,7 +1,7 @@ -import { useState } from 'react'; +import { useState, useRef } from 'react'; import Speaker from '../assets/speaker.svg?react'; import Stopspeech from '../assets/stopspeech.svg?react'; -import EasySpeech from 'easy-speech'; +const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com'; export default function SpeakButton({ text, @@ -14,28 +14,46 @@ export default function SpeakButton({ }) { const [isSpeaking, setIsSpeaking] = useState(false); const [isSpeakHovered, setIsSpeakHovered] = useState(false); + const audioRef = useRef(null); // Reference to the audio object - const handleSpeakClick = async (text: string) => { + const handleSpeakClick = async () => { if (isSpeaking) { - EasySpeech.cancel(); + // Stop audio if currently playing and reset the state + audioRef.current?.pause(); + audioRef.current = null; setIsSpeaking(false); return; - } // Stop ongoing speech if already speaking + } try { - await EasySpeech.init(); // Initialize EasySpeech setIsSpeaking(true); - - EasySpeech.speak({ - text, - onend: () => setIsSpeaking(false), // Reset when speech ends - onerror: () => { - console.error('Speech synthesis failed.'); - setIsSpeaking(false); - }, + + // Make a POST request to the /api/tts endpoint + const response = await fetch(apiHost + '/api/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text }), }); + + const data = await response.json(); + + if (data.success && data.audio_base64) { + const audio = new Audio(`data:audio/mp3;base64,${data.audio_base64}`); + audioRef.current = audio; // Store the audio object in ref for later control + audio.play(); + + // Reset state when audio ends + audio.onended = () => { + setIsSpeaking(false); + audioRef.current = null; + }; + } else { + console.error('Failed to retrieve audio.'); + setIsSpeaking(false); + } } catch (error) { - console.error('Failed to initialize speech synthesis', error); + console.error('Error fetching audio from TTS endpoint', error); + setIsSpeaking(false); } }; @@ -50,14 +68,14 @@ export default function SpeakButton({ {isSpeaking ? ( handleSpeakClick(text)} + onClick={handleSpeakClick} onMouseEnter={() => setIsSpeakHovered(true)} onMouseLeave={() => setIsSpeakHovered(false)} /> ) : ( handleSpeakClick(text)} + onClick={handleSpeakClick} onMouseEnter={() => setIsSpeakHovered(true)} onMouseLeave={() => setIsSpeakHovered(false)} /> From 605f168c7ef834f733bdeb461e9ad56c28404d4f Mon Sep 17 00:00:00 2001 From: Srayash Date: Tue, 29 Oct 2024 23:51:25 +0530 Subject: [PATCH 7/8] remove unused modules (easy-speech) --- frontend/package-lock.json | 1 - frontend/package.json | 1 - 2 files changed, 2 deletions(-) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 37bf0c07..9973bb9e 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -10,7 +10,6 @@ "dependencies": { "@reduxjs/toolkit": "^2.2.7", "chart.js": "^4.4.4", - "easy-speech": "^2.4.0", "i18next": "^23.15.1", "i18next-browser-languagedetector": "^8.0.0", "prop-types": "^15.8.1", diff --git a/frontend/package.json b/frontend/package.json index 9148831f..83d531d6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -21,7 +21,6 @@ "dependencies": { "@reduxjs/toolkit": "^2.2.7", "chart.js": "^4.4.4", - "easy-speech": "^2.4.0", "i18next": "^23.15.1", "i18next-browser-languagedetector": "^8.0.0", "prop-types": "^15.8.1", From 5c99615edf373bef3b50c1c188064ca7979864fc Mon Sep 17 00:00:00 2001 From: Srayash Date: Wed, 30 Oct 2024 00:03:08 +0530 Subject: [PATCH 8/8] UI changes: add loading animation while audio response is being fetched --- frontend/src/assets/Loading.svg | 3 ++ .../src/components/TextToSpeechButton.tsx | 37 ++++++++++++------- 2 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 frontend/src/assets/Loading.svg diff --git a/frontend/src/assets/Loading.svg b/frontend/src/assets/Loading.svg new file mode 100644 index 00000000..84a604f9 --- /dev/null +++ b/frontend/src/assets/Loading.svg @@ -0,0 +1,3 @@ + + + diff --git a/frontend/src/components/TextToSpeechButton.tsx b/frontend/src/components/TextToSpeechButton.tsx index b5402469..2cb9e8f8 100644 --- a/frontend/src/components/TextToSpeechButton.tsx +++ b/frontend/src/components/TextToSpeechButton.tsx @@ -1,6 +1,7 @@ import { useState, useRef } from 'react'; import Speaker from '../assets/speaker.svg?react'; import Stopspeech from '../assets/stopspeech.svg?react'; +import LoadingIcon from '../assets/Loading.svg?react'; // Add a loading icon SVG here const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com'; export default function SpeakButton({ @@ -13,12 +14,13 @@ export default function SpeakButton({ colorDark?: string; }) { const [isSpeaking, setIsSpeaking] = useState(false); + const [isLoading, setIsLoading] = useState(false); const [isSpeakHovered, setIsSpeakHovered] = useState(false); - const audioRef = useRef(null); // Reference to the audio object + const audioRef = useRef(null); const handleSpeakClick = async () => { if (isSpeaking) { - // Stop audio if currently playing and reset the state + // Stop audio if it's currently playing audioRef.current?.pause(); audioRef.current = null; setIsSpeaking(false); @@ -26,9 +28,9 @@ export default function SpeakButton({ } try { - setIsSpeaking(true); + // Set loading state and initiate TTS request + setIsLoading(true); - // Make a POST request to the /api/tts endpoint const response = await fetch(apiHost + '/api/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -38,22 +40,27 @@ export default function SpeakButton({ const data = await response.json(); if (data.success && data.audio_base64) { + // Create and play the audio const audio = new Audio(`data:audio/mp3;base64,${data.audio_base64}`); - audioRef.current = audio; // Store the audio object in ref for later control - audio.play(); + audioRef.current = audio; - // Reset state when audio ends - audio.onended = () => { - setIsSpeaking(false); - audioRef.current = null; - }; + audio.play().then(() => { + setIsSpeaking(true); + setIsLoading(false); + + // Reset when audio ends + audio.onended = () => { + setIsSpeaking(false); + audioRef.current = null; + }; + }); } else { console.error('Failed to retrieve audio.'); - setIsSpeaking(false); + setIsLoading(false); } } catch (error) { console.error('Error fetching audio from TTS endpoint', error); - setIsSpeaking(false); + setIsLoading(false); } }; @@ -65,7 +72,9 @@ export default function SpeakButton({ : `bg-[${colorLight ? colorLight : '#FFFFFF'}] dark:bg-[${colorDark ? colorDark : 'transparent'}]` }`} > - {isSpeaking ? ( + {isLoading ? ( + + ) : isSpeaking ? (