stackblitz-labs · zaberazaber · Dec 2, 2024 · Dec 2, 2024
diff --git a/app/components/chat/BaseChat.tsx b/app/components/chat/BaseChat.tsx
@@ -21,6 +21,8 @@ import type { ProviderInfo } from '~/utils/types';
 import { ExportChatButton } from '~/components/chat/chatExportAndImport/ExportChatButton';
 import { ImportButtons } from '~/components/chat/chatExportAndImport/ImportButtons';
 import { ExamplePrompts } from '~/components/chat/ExamplePrompts';
+import { useVoiceInput } from '~/lib/hooks/useVoiceInput';
+
 
 // @ts-ignore TODO: Introduce proper types
 // eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -117,6 +119,8 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
     const [apiKeys, setApiKeys] = useState<Record<string, string>>({});
     const [modelList, setModelList] = useState(MODEL_LIST);
     const [isModelSettingsCollapsed, setIsModelSettingsCollapsed] = useState(false);
+    const [shouldUpdateInput, setShouldUpdateInput] = useState(false);
+    const { isListening, toggleListening, transcription } = useVoiceInput(input, handleInputChange);
 
     useEffect(() => {
       // Load API keys from cookies on component mount
@@ -142,6 +146,16 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
       });
     }, []);
 
+
+    useEffect(() => {
+      // Only update the input if 'Stop Listening' was clicked
+      if (shouldUpdateInput && transcription && transcription !== input) {
+        handleInputChange?.({ target: { value: input + transcription } } as React.ChangeEvent<HTMLTextAreaElement>);
+        setShouldUpdateInput(false); // Reset the flag after updating
+      }
+    }, [transcription, shouldUpdateInput]);
+
+
     const updateApiKey = (provider: string, key: string) => {
       try {
         const updatedApiKeys = { ...apiKeys, [provider]: key };
@@ -326,6 +340,18 @@ export const BaseChat = React.forwardRef<HTMLDivElement, BaseChatProps>(
                   </ClientOnly>
                   <div className="flex justify-between items-center text-sm p-4 pt-2">
                     <div className="flex gap-1 items-center">
+
+
+<IconButton
+              title={isListening ? 'Stop listening' : 'Start listening'}
+              onClick={toggleListening}
+            >
+              {isListening ? (
+                <svg className="i-svg-spinners:90-ring-with-bg text-xl animate-spin"></svg>
+              ) : (
+                <svg className="i-bolt:mic text-xl"></svg>
+              )}
+            </IconButton>
                       <IconButton
                         title="Enhance prompt"
                         disabled={input.length === 0 || enhancingPrompt}

diff --git a/app/lib/hooks/useTextToSpeech.ts b/app/lib/hooks/useTextToSpeech.ts
@@ -0,0 +1,60 @@
+import { useState } from 'react';
+
+export const useTextToSpeech = () => {
+  const [transcription, setTranscription] = useState('');
+  const [recognizer, setRecognizer] = useState<SpeechRecognition | null>(null);
+
+  const startListening = () => {
+    if (window.SpeechRecognition || (window as any).webkitSpeechRecognition) {
+      const recognition = new (window.SpeechRecognition || (window as any).webkitSpeechRecognition)();
+      recognition.continuous = true;
+      recognition.interimResults = true;
+
+      recognition.onstart = () => {
+        console.log("Voice recognition started");
+      };
+
+      recognition.onresult = (event: SpeechRecognitionEvent) => {
+        let result = event.results[event.resultIndex];
+        if (result && result[0]) {
+          const transcript = result[0].transcript;
+
+          // Update transcription only if the result is final
+          if (event.results[0].isFinal) {
+            setTranscription(transcript); // Store only final result
+            console.log("Final Transcription:", transcript);
+          } else {
+            console.log("Interim Transcription:", transcript);
+          }
+        }
+      };
+
+      recognition.onend = () => {
+        console.log("Voice recognition stopped");
+        setRecognizer(null); // Reset recognizer state
+        setTranscription(''); // Clear transcription when stopping
+      };
+
+      recognition.onerror = (event) => {
+        console.error("Error occurred in speech recognition:", event.error);
+      };
+
+      setRecognizer(recognition);
+      recognition.start();
+    } else {
+      alert("Speech recognition is not supported in your browser.");
+    }
+  };
+
+  const stopListening = () => {
+    if (recognizer) {
+      recognizer.stop();
+      setRecognizer(null); // Reset recognizer state
+      setTranscription(''); // Clear transcription when stopping
+    }
+  };
+
+  return { transcription, startListening, stopListening };
+};
+
+
diff --git a/app/lib/hooks/useVoiceInput.ts b/app/lib/hooks/useVoiceInput.ts
@@ -0,0 +1,34 @@
+import { useState } from 'react';
+import { useTextToSpeech } from '~/lib/hooks/useTextToSpeech';
+
+export const useVoiceInput = (
+  input: string,
+  handleInputChange: ((event: React.ChangeEvent<HTMLTextAreaElement>) => void) | undefined
+) => {
+  const [isListening, setIsListening] = useState(false);
+  const { transcription, startListening, stopListening } = useTextToSpeech();
+
+  const toggleListening = () => {
+    if (isListening) {
+      console.log("Stopping listening...");
+      stopListening();
+      if (transcription && handleInputChange) {
+        console.log("Transcription:", transcription);
+        // Append transcription to the textarea value
+        handleInputChange?.({
+          target: { value: input + (input ? " " : "") + transcription },
+        } as React.ChangeEvent<HTMLTextAreaElement>);
+      }
+    } else {
+      console.log("Starting listening...");
+      startListening();
+    }
+    setIsListening(!isListening);
+  };
+
+  return {
+    isListening,
+    toggleListening,
+    transcription,
+  };
+};
diff --git a/app/types/global.d.ts b/app/types/global.d.ts
@@ -1,3 +1,60 @@
 interface Window {
   showDirectoryPicker(): Promise<FileSystemDirectoryHandle>;
 }
+
+interface SpeechRecognition extends EventTarget {
+  grammars: SpeechGrammarList;
+  lang: string;
+  continuous: boolean;
+  interimResults: boolean;
+  maxAlternatives: number;
+  serviceURI: string;
+
+  start(): void;
+  stop(): void;
+  abort(): void;
+
+  onaudiostart: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onsoundstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onspeechstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onspeechend: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onsoundend: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onaudioend: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+  onnomatch: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+  onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
+  onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+  onend: ((this: SpeechRecognition, ev: Event) => any) | null;
+}
+
+declare var SpeechRecognition: {
+  prototype: SpeechRecognition;
+  new (): SpeechRecognition;
+};
+
+interface SpeechRecognitionEvent extends Event {
+  readonly resultIndex: number;
+  readonly results: SpeechRecognitionResultList;
+}
+
+interface SpeechRecognitionResultList {
+  [index: number]: SpeechRecognitionResult;
+  readonly length: number;
+}
+
+interface SpeechRecognitionResult {
+  readonly isFinal: boolean;
+  readonly length: number;
+  item(index: number): SpeechRecognitionAlternative;
+  [index: number]: SpeechRecognitionAlternative;
+}
+
+interface SpeechRecognitionAlternative {
+  readonly transcript: string;
+  readonly confidence: number;
+}
+
+declare let webkitSpeechRecognition: {
+  prototype: SpeechRecognition;
+  new (): SpeechRecognition;
+};
diff --git a/icons/mic.svg b/icons/mic.svg