From 36b7d94bdd0ab8b92d780d581889c4dc75607cd7 Mon Sep 17 00:00:00 2001 From: navyseal4000 Date: Wed, 13 Nov 2024 21:36:50 -0600 Subject: [PATCH 1/5] Added speech to text capability --- app/components/chat/BaseChat.tsx | 79 ++++++++++++++++++++++++++++++++ app/types/global.d.ts | 2 + 2 files changed, 81 insertions(+) diff --git a/app/components/chat/BaseChat.tsx b/app/components/chat/BaseChat.tsx index 902cb23..fee28c0 100644 --- a/app/components/chat/BaseChat.tsx +++ b/app/components/chat/BaseChat.tsx @@ -87,6 +87,35 @@ interface BaseChatProps { enhancePrompt?: () => void; } +const SpeechRecognitionButton = ({ + isListening, + onStart, + onStop, + disabled +}: { + isListening: boolean; + onStart: () => void; + onStop: () => void; + disabled: boolean; +}) => { + return ( + + {isListening ? ( +
+ ) : ( +
+ )} + + ); +}; + export const BaseChat = React.forwardRef( ( { @@ -114,6 +143,8 @@ export const BaseChat = React.forwardRef( const TEXTAREA_MAX_HEIGHT = chatStarted ? 400 : 200; const [apiKeys, setApiKeys] = useState>({}); const [modelList, setModelList] = useState(MODEL_LIST); + const [isListening, setIsListening] = useState(false); + const [recognition, setRecognition] = useState(null); useEffect(() => { // Load API keys from cookies on component mount @@ -134,8 +165,49 @@ export const BaseChat = React.forwardRef( initializeModelList().then((modelList) => { setModelList(modelList); }); + if (typeof window !== 'undefined' && ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window)) { + const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; + const recognition = new SpeechRecognition(); + recognition.continuous = true; + recognition.interimResults = true; + + recognition.onresult = (event) => { + const transcript = Array.from(event.results) + .map(result => result[0]) + .map(result => result.transcript) + .join(''); + + if (handleInputChange) { + const syntheticEvent = { + target: { value: transcript }, + } as React.ChangeEvent; + handleInputChange(syntheticEvent); + } + }; + + recognition.onerror = (event) => { + console.error('Speech recognition error:', event.error); + setIsListening(false); + }; + + setRecognition(recognition); + } }, []); + const startListening = () => { + if (recognition) { + recognition.start(); + setIsListening(true); + } + }; + + const stopListening = () => { + if (recognition) { + recognition.stop(); + setIsListening(false); + } + }; + const updateApiKey = (provider: string, key: string) => { try { const updatedApiKeys = { ...apiKeys, [provider]: key }; @@ -284,6 +356,13 @@ export const BaseChat = React.forwardRef( )} + +
{input.length > 3 ? (
diff --git a/app/types/global.d.ts b/app/types/global.d.ts index a1f6789..193c65d 100644 --- a/app/types/global.d.ts +++ b/app/types/global.d.ts @@ -1,3 +1,5 @@ interface Window { showDirectoryPicker(): Promise; + webkitSpeechRecognition: typeof SpeechRecognition; + SpeechRecognition: typeof SpeechRecognition; } From a896f3f312bcecf9b9df588b63a4a3b78efbe06d Mon Sep 17 00:00:00 2001 From: navyseal4000 Date: Thu, 21 Nov 2024 07:55:53 -0600 Subject: [PATCH 2/5] Clear speech to text, listening upon submission --- app/components/chat/BaseChat.tsx | 38 ++++++++++++++++++++++++++++---- app/utils/constants.ts | 2 +- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/app/components/chat/BaseChat.tsx b/app/components/chat/BaseChat.tsx index fee28c0..dde0cca 100644 --- a/app/components/chat/BaseChat.tsx +++ b/app/components/chat/BaseChat.tsx @@ -145,6 +145,7 @@ export const BaseChat = React.forwardRef( const [modelList, setModelList] = useState(MODEL_LIST); const [isListening, setIsListening] = useState(false); const [recognition, setRecognition] = useState(null); + const [transcript, setTranscript] = useState(''); useEffect(() => { // Load API keys from cookies on component mount @@ -177,6 +178,9 @@ export const BaseChat = React.forwardRef( .map(result => result.transcript) .join(''); + setTranscript(transcript); + + if (handleInputChange) { const syntheticEvent = { target: { value: transcript }, @@ -208,6 +212,25 @@ export const BaseChat = React.forwardRef( } }; + const handleSendMessage = (event: React.UIEvent, messageInput?: string) => { + if (sendMessage) { + sendMessage(event, messageInput); + if (recognition) { + recognition.abort(); // Stop current recognition + setTranscript(''); // Clear transcript + setIsListening(false); + + // Clear the input by triggering handleInputChange with empty value + if (handleInputChange) { + const syntheticEvent = { + target: { value: '' }, + } as React.ChangeEvent; + handleInputChange(syntheticEvent); + } + } + } + }; + const updateApiKey = (provider: string, key: string) => { try { const updatedApiKeys = { ...apiKeys, [provider]: key }; @@ -301,8 +324,11 @@ export const BaseChat = React.forwardRef( } event.preventDefault(); - - sendMessage?.(event); + if (isStreaming) { + handleStop?.(); + return; + } + handleSendMessage?.(event); } }} value={input} @@ -327,7 +353,7 @@ export const BaseChat = React.forwardRef( return; } - sendMessage?.(event); + handleSendMessage?.(event); }} /> )} @@ -384,7 +410,11 @@ export const BaseChat = React.forwardRef(