From f7fc58dffd9aad072a1ee2533d426444b49472c7 Mon Sep 17 00:00:00 2001 From: thivy Date: Fri, 29 Sep 2023 17:13:26 +1000 Subject: [PATCH 01/13] add speech to text --- src/components/chat/chat-input.tsx | 14 ++- .../chat/chat-speech/speech-button.tsx | 93 ++++++++++++++++++ .../chat/chat-speech/speech-service.ts | 18 ++++ src/features/chat/chat-ui/chat-ui.tsx | 33 +++---- src/package-lock.json | 96 +++++++++++++++++++ src/package.json | 1 + src/type.ts | 4 +- 7 files changed, 235 insertions(+), 24 deletions(-) create mode 100644 src/features/chat/chat-speech/speech-button.tsx create mode 100644 src/features/chat/chat-speech/speech-service.ts diff --git a/src/components/chat/chat-input.tsx b/src/components/chat/chat-input.tsx index 484b7c90f..88282ab35 100644 --- a/src/components/chat/chat-input.tsx +++ b/src/components/chat/chat-input.tsx @@ -1,3 +1,4 @@ +import { SpeechButton } from "@/features/chat/chat-speech/speech-button"; import { Loader, Send } from "lucide-react"; import { FC, FormEvent, useRef, useState } from "react"; import { Button } from "../ui/button"; @@ -5,13 +6,14 @@ import { Textarea } from "../ui/textarea"; interface Props { value: string; + setInput: (value: string) => void; handleSubmit: (e: FormEvent) => void; - handleInputChange: (e: any) => void; isLoading: boolean; } const ChatInput: FC = (props) => { const buttonRef = useRef(null); + const textAreaRef = useRef(null); const [rows, setRows] = useState(1); const maxRows = 6; const [keysPressed, setKeysPressed] = useState(new Set()); @@ -47,7 +49,7 @@ const ChatInput: FC = (props) => { const onChange = (event: React.ChangeEvent) => { setRowsToMax(event.target.value.split("\n").length - 1); - props.handleInputChange(event); + props.setInput(event.target.value); }; const setRowsToMax = (rows: number) => { @@ -63,6 +65,7 @@ const ChatInput: FC = (props) => { >
+ { + textAreaRef.current!.value = text; + props.setInput(text); + }} + /> + ); +}; + +interface SpeechRecognizerProps { + onSpeech: (text: string) => void; +} + +export const useSpeechRecognizer = (props: SpeechRecognizerProps) => { + const recognizerRef = useRef(); + + const startRecognition = async () => { + const token = await GetSpeechToken(); + console.log(token); + const speechConfig = SpeechConfig.fromAuthorizationToken( + token.token, + token.region + ); + + const audioConfig = AudioConfig.fromDefaultMicrophoneInput(); + const autoDetectSourceLanguageConfig = + AutoDetectSourceLanguageConfig.fromLanguages([ + "en-US", + "zh-CN", + "it-IT", + "pt-BR", + ]); + const recognizer = SpeechRecognizer.FromConfig( + speechConfig, + autoDetectSourceLanguageConfig, + audioConfig + ); + + recognizerRef.current = recognizer; + + recognizer.recognizing = (s, e) => { + props.onSpeech(e.result.text); + }; + + recognizer.startContinuousRecognitionAsync(); + }; + + const stopRecognition = () => { + recognizerRef.current?.stopContinuousRecognitionAsync(); + }; + + return { startRecognition, stopRecognition }; +}; diff --git a/src/features/chat/chat-speech/speech-service.ts b/src/features/chat/chat-speech/speech-service.ts new file mode 100644 index 000000000..bf196a58e --- /dev/null +++ b/src/features/chat/chat-speech/speech-service.ts @@ -0,0 +1,18 @@ +"use server"; + +export const GetSpeechToken = async () => { + const response = await fetch( + `https://${process.env.AZURE_SPEECH_REGION}.api.cognitive.microsoft.com/sts/v1.0/issueToken`, + { + method: "POST", + headers: { + "Ocp-Apim-Subscription-Key": process.env.AZURE_SPEECH_KEY!, + }, + } + ); + + return { + token: await response.text(), + region: process.env.AZURE_SPEECH_REGION, + }; +}; diff --git a/src/features/chat/chat-ui/chat-ui.tsx b/src/features/chat/chat-ui/chat-ui.tsx index 3e5ab54ed..d280dd0ee 100644 --- a/src/features/chat/chat-ui/chat-ui.tsx +++ b/src/features/chat/chat-ui/chat-ui.tsx @@ -31,7 +31,6 @@ interface Prop { } export const ChatUI: FC = (props) => { - const { data: session } = useSession(); const [isUploadingFile, setIsUploadingFile] = useState(false); @@ -42,26 +41,20 @@ export const ChatUI: FC = (props) => { id: props.chatThread.id, chatType: props.chatThread.chatType, conversationStyle: props.chatThread.conversationStyle, - chatOverFileName: props.chatThread.chatOverFileName + chatOverFileName: props.chatThread.chatOverFileName, }); const { toast } = useToast(); const id = props.chatThread.id; - - const { - messages, - input, - handleInputChange, - handleSubmit, - reload, - isLoading, - } = useChat({ - onError, - id, - body: chatBody, - initialMessages: transformCosmosToAIModel(props.chats), - }); + + const { messages, input, setInput, handleSubmit, reload, isLoading } = + useChat({ + onError, + id, + body: chatBody, + initialMessages: transformCosmosToAIModel(props.chats), + }); const scrollRef = useRef(null); useChatScrollAnchor(messages, scrollRef); @@ -83,7 +76,7 @@ export const ChatUI: FC = (props) => { }); } - const onChatTypeChange = (value: ChatType) => { + const onChatTypeChange = async (value: ChatType) => { setBody((e) => ({ ...e, chatType: value })); }; @@ -144,9 +137,7 @@ export const ChatUI: FC = (props) => { const ChatWindow = (
- +
{messages.map((message, index) => ( @@ -184,7 +175,7 @@ export const ChatUI: FC = (props) => {
diff --git a/src/package-lock.json b/src/package-lock.json index 6b49ec1b1..e07ea5497 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -29,6 +29,7 @@ "eslint-config-next": "^13.4.12", "langchain": "^0.0.123", "lucide-react": "^0.264.0", + "microsoft-cognitiveservices-speech-sdk": "^1.31.0", "nanoid": "^4.0.2", "next": "^13.5.3", "next-auth": "^4.22.4", @@ -2251,6 +2252,27 @@ } ] }, + "node_modules/bent": { + "version": "7.3.12", + "resolved": "https://registry.npmjs.org/bent/-/bent-7.3.12.tgz", + "integrity": "sha512-T3yrKnVGB63zRuoco/7Ybl7BwwGZR0lceoVG5XmQyMIH9s19SV5m+a8qam4if0zQuAmOQTyPTPmsQBdAorGK3w==", + "dependencies": { + "bytesish": "^0.4.1", + "caseless": "~0.12.0", + "is-stream": "^2.0.0" + } + }, + "node_modules/bent/node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/big-integer": { "version": "1.6.51", "resolved": "https://registry.npmjs.org/big-integer/-/big-integer-1.6.51.tgz", @@ -2364,6 +2386,11 @@ "node": ">=10.16.0" } }, + "node_modules/bytesish": { + "version": "0.4.4", + "resolved": "https://registry.npmjs.org/bytesish/-/bytesish-0.4.4.tgz", + "integrity": "sha512-i4uu6M4zuMUiyfZN4RU2+i9+peJh//pXhd9x1oSe1LBkZ3LEbCoygu8W0bXTukU1Jme2txKuotpCZRaC3FLxcQ==" + }, "node_modules/call-bind": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", @@ -2422,6 +2449,11 @@ } ] }, + "node_modules/caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw==" + }, "node_modules/ccount": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", @@ -5994,6 +6026,50 @@ "node": ">=8.6" } }, + "node_modules/microsoft-cognitiveservices-speech-sdk": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.32.0.tgz", + "integrity": "sha512-TQqCIytCvW7x8MB2UT8DfyZkIjO34CSpy0zYlbQChkYWrYNzGgMIAA3uTGuYGj8hb0xMQBwRfqyAc5sA2VRgjQ==", + "dependencies": { + "agent-base": "^6.0.1", + "bent": "^7.3.12", + "https-proxy-agent": "^4.0.0", + "uuid": "^9.0.0", + "ws": "^7.5.6" + } + }, + "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/https-proxy-agent": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz", + "integrity": "sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==", + "dependencies": { + "agent-base": "5", + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/https-proxy-agent/node_modules/agent-base": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz", + "integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==", + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/microsoft-cognitiveservices-speech-sdk/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -8643,6 +8719,26 @@ "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, + "node_modules/ws": { + "version": "7.5.9", + "resolved": "https://registry.npmjs.org/ws/-/ws-7.5.9.tgz", + "integrity": "sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q==", + "engines": { + "node": ">=8.3.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": "^5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", diff --git a/src/package.json b/src/package.json index 74d38260c..af48dec4a 100644 --- a/src/package.json +++ b/src/package.json @@ -30,6 +30,7 @@ "eslint-config-next": "^13.4.12", "langchain": "^0.0.123", "lucide-react": "^0.264.0", + "microsoft-cognitiveservices-speech-sdk": "^1.32.0", "nanoid": "^4.0.2", "next": "^13.5.3", "next-auth": "^4.22.4", diff --git a/src/type.ts b/src/type.ts index a70764712..175553759 100644 --- a/src/type.ts +++ b/src/type.ts @@ -21,7 +21,9 @@ const azureEnvVars = [ "NEXTAUTH_URL", "AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT", "AZURE_DOCUMENT_INTELLIGENCE_KEY", - "ADMIN_EMAIL_ADDRESS" + "ADMIN_EMAIL_ADDRESS", + "AZURE_SPEECH_REGION", + "AZURE_SPEECH_KEY", ] as const; type RequiredServerEnvKeys = (typeof azureEnvVars)[number]; From 2340039e93003b230f67bd6bc92c3c3da06fc2a7 Mon Sep 17 00:00:00 2001 From: thivy Date: Sat, 30 Sep 2023 19:02:14 +1000 Subject: [PATCH 02/13] add text to speech --- src/components/chat/chat-input.tsx | 6 +- src/features/chat/chat-speech/microphone.tsx | 22 +++++ .../chat/chat-speech/record-speech.tsx | 42 ++++++++++ .../chat/chat-speech/speech-context.tsx | 82 +++++++++++++++++++ src/features/chat/chat-speech/stop-speech.tsx | 23 ++++++ ...ch-button.tsx => use-speech-recognizer.ts} | 42 +--------- src/features/chat/chat-ui/chat-ui.tsx | 7 +- src/features/providers.tsx | 5 +- 8 files changed, 184 insertions(+), 45 deletions(-) create mode 100644 src/features/chat/chat-speech/microphone.tsx create mode 100644 src/features/chat/chat-speech/record-speech.tsx create mode 100644 src/features/chat/chat-speech/speech-context.tsx create mode 100644 src/features/chat/chat-speech/stop-speech.tsx rename src/features/chat/chat-speech/{speech-button.tsx => use-speech-recognizer.ts} (58%) diff --git a/src/components/chat/chat-input.tsx b/src/components/chat/chat-input.tsx index 88282ab35..9e7364639 100644 --- a/src/components/chat/chat-input.tsx +++ b/src/components/chat/chat-input.tsx @@ -1,4 +1,4 @@ -import { SpeechButton } from "@/features/chat/chat-speech/speech-button"; +import { Microphone } from "@/features/chat/chat-speech/microphone"; import { Loader, Send } from "lucide-react"; import { FC, FormEvent, useRef, useState } from "react"; import { Button } from "../ui/button"; @@ -15,7 +15,9 @@ const ChatInput: FC = (props) => { const buttonRef = useRef(null); const textAreaRef = useRef(null); const [rows, setRows] = useState(1); + const maxRows = 6; + const [keysPressed, setKeysPressed] = useState(new Set()); const onKeyDown = (event: React.KeyboardEvent) => { @@ -75,7 +77,7 @@ const ChatInput: FC = (props) => { onChange={onChange} >
- { textAreaRef.current!.value = text; diff --git a/src/features/chat/chat-speech/microphone.tsx b/src/features/chat/chat-speech/microphone.tsx new file mode 100644 index 000000000..8e406fe3b --- /dev/null +++ b/src/features/chat/chat-speech/microphone.tsx @@ -0,0 +1,22 @@ +import { FC } from "react"; +import { RecordSpeech } from "./record-speech"; +import { useSpeechContext } from "./speech-context"; +import { StopSpeech } from "./stop-speech"; + +interface MicrophoneProps { + disabled: boolean; + onSpeech: (text: string) => void; +} + +export const Microphone: FC = (props) => { + const { isPlaying } = useSpeechContext(); + return ( + <> + {isPlaying ? ( + + ) : ( + + )} + + ); +}; diff --git a/src/features/chat/chat-speech/record-speech.tsx b/src/features/chat/chat-speech/record-speech.tsx new file mode 100644 index 000000000..46698c536 --- /dev/null +++ b/src/features/chat/chat-speech/record-speech.tsx @@ -0,0 +1,42 @@ +import { Button } from "@/components/ui/button"; +import { Mic } from "lucide-react"; +import { FC, useState } from "react"; +import { useSpeechRecognizer } from "./use-speech-recognizer"; + +interface Prop { + onSpeech: (text: string) => void; + disabled: boolean; +} + +export const RecordSpeech: FC = (props) => { + const [isPressed, setIsPressed] = useState(false); + + const { startRecognition, stopRecognition } = useSpeechRecognizer({ + onSpeech: props.onSpeech, + }); + + const handleMouseDown = async () => { + await startRecognition(); + setIsPressed(true); + }; + + const handleMouseUp = () => { + stopRecognition(); + setIsPressed(false); + }; + + return ( + + ); +}; diff --git a/src/features/chat/chat-speech/speech-context.tsx b/src/features/chat/chat-speech/speech-context.tsx new file mode 100644 index 000000000..834fd0fd9 --- /dev/null +++ b/src/features/chat/chat-speech/speech-context.tsx @@ -0,0 +1,82 @@ +import { + AudioConfig, + ResultReason, + SpeakerAudioDestination, + SpeechConfig, + SpeechSynthesizer, +} from "microsoft-cognitiveservices-speech-sdk"; +import React, { createContext, useRef, useState } from "react"; +import { GetSpeechToken } from "./speech-service"; + +interface SpeechContextProps { + textToSpeech: (textToSpeak: string) => Promise; + stopPlaying: () => void; + isPlaying: boolean; +} + +const SpeechContext = createContext(null); + +export const SpeechProvider = ({ children }: { children: React.ReactNode }) => { + const [isPlaying, setIsPlaying] = useState(false); + const playerRef = useRef(); + + const stopPlaying = () => { + setIsPlaying(false); + if (playerRef.current) { + playerRef.current.pause(); + } + }; + + const textToSpeech = async (textToSpeak: string) => { + if (isPlaying) { + stopPlaying(); + } + + const tokenObj = await GetSpeechToken(); + const speechConfig = SpeechConfig.fromAuthorizationToken( + tokenObj.token, + tokenObj.region + ); + playerRef.current = new SpeakerAudioDestination(); + + var audioConfig = AudioConfig.fromSpeakerOutput(playerRef.current); + let synthesizer = new SpeechSynthesizer(speechConfig, audioConfig); + + playerRef.current.onAudioEnd = () => { + setIsPlaying(false); + }; + + synthesizer.speakTextAsync( + textToSpeak, + (result) => { + if (result.reason === ResultReason.SynthesizingAudioCompleted) { + console.log("synthesis finished."); + setIsPlaying(true); + } else { + console.error("Speech synthesis canceled, " + result.errorDetails); + setIsPlaying(false); + } + synthesizer.close(); + }, + function (err) { + console.log("err - " + err); + synthesizer.close(); + } + ); + }; + + return ( + + {children} + + ); +}; + +export const useSpeechContext = () => { + const context = React.useContext(SpeechContext); + if (!context) { + throw new Error("SpeechContext is null"); + } + + return context; +}; diff --git a/src/features/chat/chat-speech/stop-speech.tsx b/src/features/chat/chat-speech/stop-speech.tsx new file mode 100644 index 000000000..2929ae546 --- /dev/null +++ b/src/features/chat/chat-speech/stop-speech.tsx @@ -0,0 +1,23 @@ +import { Button } from "@/components/ui/button"; +import { Square } from "lucide-react"; +import { FC } from "react"; +import { useSpeechContext } from "./speech-context"; + +interface StopButtonProps { + disabled: boolean; +} + +export const StopSpeech: FC = (props) => { + const { stopPlaying } = useSpeechContext(); + return ( + + ); +}; diff --git a/src/features/chat/chat-speech/speech-button.tsx b/src/features/chat/chat-speech/use-speech-recognizer.ts similarity index 58% rename from src/features/chat/chat-speech/speech-button.tsx rename to src/features/chat/chat-speech/use-speech-recognizer.ts index e082b96c8..25d25e9a3 100644 --- a/src/features/chat/chat-speech/speech-button.tsx +++ b/src/features/chat/chat-speech/use-speech-recognizer.ts @@ -1,52 +1,12 @@ -import { Button } from "@/components/ui/button"; -import { Mic } from "lucide-react"; import { AudioConfig, AutoDetectSourceLanguageConfig, SpeechConfig, SpeechRecognizer, } from "microsoft-cognitiveservices-speech-sdk"; -import { FC, useRef, useState } from "react"; +import { useRef } from "react"; import { GetSpeechToken } from "./speech-service"; -interface Prop { - onSpeech: (text: string) => void; - disabled: boolean; -} - -export const SpeechButton: FC = (props) => { - const [isPressed, setIsPressed] = useState(false); - - const { startRecognition, stopRecognition } = useSpeechRecognizer({ - onSpeech: props.onSpeech, - }); - - const handleMouseDown = async () => { - await startRecognition(); - setIsPressed(true); - }; - - const handleMouseUp = () => { - stopRecognition(); - setIsPressed(false); - }; - - return ( - - ); -}; - interface SpeechRecognizerProps { onSpeech: (text: string) => void; } diff --git a/src/features/chat/chat-ui/chat-ui.tsx b/src/features/chat/chat-ui/chat-ui.tsx index d280dd0ee..a30cf7dbd 100644 --- a/src/features/chat/chat-ui/chat-ui.tsx +++ b/src/features/chat/chat-ui/chat-ui.tsx @@ -7,6 +7,7 @@ import { useChatScrollAnchor } from "@/components/hooks/use-chat-scroll-anchor"; import { ToastAction } from "@/components/ui/toast"; import { useToast } from "@/components/ui/use-toast"; import { AI_NAME } from "@/features/theme/customise"; +import { Message } from "ai"; import { useChat } from "ai/react"; import { useSession } from "next-auth/react"; import { FC, FormEvent, useRef, useState } from "react"; @@ -22,6 +23,7 @@ import { PromptGPTBody, } from "../chat-services/models"; import { transformCosmosToAIModel } from "../chat-services/utils"; +import { useSpeechContext } from "../chat-speech/speech-context"; import { EmptyState } from "./chat-empty-state"; import { ChatHeader } from "./chat-header"; @@ -45,7 +47,7 @@ export const ChatUI: FC = (props) => { }); const { toast } = useToast(); - + const { textToSpeech } = useSpeechContext(); const id = props.chatThread.id; const { messages, input, setInput, handleSubmit, reload, isLoading } = @@ -54,6 +56,9 @@ export const ChatUI: FC = (props) => { id, body: chatBody, initialMessages: transformCosmosToAIModel(props.chats), + onFinish: async (lastMessage: Message) => { + await textToSpeech(lastMessage.content); + }, }); const scrollRef = useRef(null); diff --git a/src/features/providers.tsx b/src/features/providers.tsx index ff3224b19..eda17b191 100644 --- a/src/features/providers.tsx +++ b/src/features/providers.tsx @@ -1,12 +1,15 @@ "use client"; import { SessionProvider } from "next-auth/react"; +import { SpeechProvider } from "./chat/chat-speech/speech-context"; import { MenuProvider } from "./menu/menu-context"; export const Providers = ({ children }: { children: React.ReactNode }) => { return ( - {children} + + {children} + ); }; From b64a1b8336846642da3c696d8cdc7c154e5550c3 Mon Sep 17 00:00:00 2001 From: thivy Date: Sat, 30 Sep 2023 19:21:20 +1000 Subject: [PATCH 03/13] code refactor --- src/components/chat/chat-input.tsx | 16 ++-- src/features/chat/chat-speech/microphone.tsx | 3 +- .../chat/chat-speech/record-speech.tsx | 7 +- .../chat/chat-speech/speech-context.tsx | 75 +++++-------------- .../chat/chat-speech/use-speech-recognizer.ts | 17 +++-- .../chat-speech/use-speech-synthesizer.ts | 61 +++++++++++++++ 6 files changed, 101 insertions(+), 78 deletions(-) create mode 100644 src/features/chat/chat-speech/use-speech-synthesizer.ts diff --git a/src/components/chat/chat-input.tsx b/src/components/chat/chat-input.tsx index 9e7364639..a1e552674 100644 --- a/src/components/chat/chat-input.tsx +++ b/src/components/chat/chat-input.tsx @@ -1,4 +1,5 @@ import { Microphone } from "@/features/chat/chat-speech/microphone"; +import { useSpeechContext } from "@/features/chat/chat-speech/speech-context"; import { Loader, Send } from "lucide-react"; import { FC, FormEvent, useRef, useState } from "react"; import { Button } from "../ui/button"; @@ -20,6 +21,8 @@ const ChatInput: FC = (props) => { const [keysPressed, setKeysPressed] = useState(new Set()); + const { onSpeech } = useSpeechContext(); + const onKeyDown = (event: React.KeyboardEvent) => { setKeysPressed(keysPressed.add(event.key)); @@ -60,6 +63,11 @@ const ChatInput: FC = (props) => { } }; + onSpeech((text) => { + textAreaRef.current!.value = text; + props.setInput(text); + }); + return (
= (props) => { onChange={onChange} >
- { - textAreaRef.current!.value = text; - props.setInput(text); - }} - /> + -

{props.uploadButtonLabel}

+

{uploadButtonLabel}

)} diff --git a/src/features/chat/chat-ui/chat-style-selector.tsx b/src/features/chat/chat-ui/chat-style-selector.tsx index 788c6bde4..661e57e37 100644 --- a/src/features/chat/chat-ui/chat-style-selector.tsx +++ b/src/features/chat/chat-ui/chat-style-selector.tsx @@ -2,22 +2,23 @@ import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { Brush, CircleDot, Scale } from "lucide-react"; import { FC } from "react"; import { ConversationStyle } from "../chat-services/models"; +import { useChatContext } from "./chat-context"; interface Prop { disable: boolean; - conversationStyle: ConversationStyle; - onChatStyleChange?: (value: ConversationStyle) => void; } export const ChatStyleSelector: FC = (props) => { + const { setChatBody, chatBody } = useChatContext(); + + const onChange = (value: ConversationStyle) => { + setChatBody({ ...chatBody, conversationStyle: value }); + }; + return ( - props.onChatStyleChange - ? props.onChatStyleChange(value as ConversationStyle) - : null - } + defaultValue={chatBody.conversationStyle} + onValueChange={(value) => onChange(value as ConversationStyle)} > void; } export const ChatTypeSelector: FC = (props) => { + const { setChatBody, chatBody, fileState } = useChatContext(); + + const { setShowFileUpload, setIsFileNull } = fileState; + + const onChange = (value: ChatType) => { + setShowFileUpload(value); + setIsFileNull(true); + setChatBody({ ...chatBody, chatType: value }); + }; + return ( - props.onChatTypeChange - ? props.onChatTypeChange(value as ChatType) - : null - } + defaultValue={chatBody.chatType} + onValueChange={(value) => onChange(value as ChatType)} > = (props) => { > File - {/* - Database - */} ); diff --git a/src/features/chat/chat-ui/chat-ui.tsx b/src/features/chat/chat-ui/chat-ui.tsx index 77131914d..6bac22cb5 100644 --- a/src/features/chat/chat-ui/chat-ui.tsx +++ b/src/features/chat/chat-ui/chat-ui.tsx @@ -1,180 +1,25 @@ "use client"; import ChatInput from "@/components/chat/chat-input"; -import ChatLoading from "@/components/chat/chat-loading"; -import ChatRow from "@/components/chat/chat-row"; -import { useChatScrollAnchor } from "@/components/hooks/use-chat-scroll-anchor"; -import { useToast } from "@/components/ui/use-toast"; -import { useGlobalErrorContext } from "@/features/global-error/global-error-context"; -import { AI_NAME } from "@/features/theme/customise"; -import { Message } from "ai"; -import { useChat } from "ai/react"; -import { useSession } from "next-auth/react"; -import { FC, FormEvent, useRef, useState } from "react"; -import { - IndexDocuments, - UploadDocument, -} from "../chat-services/chat-document-service"; -import { - ChatMessageModel, - ChatThreadModel, - ChatType, - ConversationStyle, - PromptGPTBody, -} from "../chat-services/models"; -import { transformCosmosToAIModel } from "../chat-services/utils"; -import { useSpeechContext } from "../chat-speech/speech-context"; -import { EmptyState } from "./chat-empty-state"; -import { ChatHeader } from "./chat-header"; +import { FC } from "react"; +import { useChatContext } from "./chat-context"; +import { ChatMessageContainer } from "./chat-message-container"; +import { ChatMessageEmptyState } from "./chat-message-empty-state"; -interface Prop { - chats: Array; - chatThread: ChatThreadModel; -} +interface Prop {} -export const ChatUI: FC = (props) => { - const { data: session } = useSession(); - - const [isUploadingFile, setIsUploadingFile] = useState(false); - - const [uploadButtonLabel, setUploadButtonLabel] = useState(""); - - const [chatBody, setBody] = useState({ - id: props.chatThread.id, - chatType: props.chatThread.chatType, - conversationStyle: props.chatThread.conversationStyle, - chatOverFileName: props.chatThread.chatOverFileName, - }); - - const { toast } = useToast(); - const { textToSpeech, isMicrophoneUsed, resetMicrophoneUsed } = - useSpeechContext(); - const { showError } = useGlobalErrorContext(); - const id = props.chatThread.id; - - const { messages, input, setInput, handleSubmit, reload, isLoading } = - useChat({ - onError, - id, - body: chatBody, - initialMessages: transformCosmosToAIModel(props.chats), - onFinish: async (lastMessage: Message) => { - if (isMicrophoneUsed) { - await textToSpeech(lastMessage.content); - resetMicrophoneUsed(); - } - }, - }); - - const scrollRef = useRef(null); - useChatScrollAnchor(messages, scrollRef); - - function onError(error: Error) { - showError(error.message, reload); - } - - const onChatTypeChange = async (value: ChatType) => { - setBody((e) => ({ ...e, chatType: value })); - }; - - const onConversationStyleChange = (value: ConversationStyle) => { - setBody((e) => ({ ...e, conversationStyle: value })); - }; - - const onHandleSubmit = (e: FormEvent) => { - handleSubmit(e); - }; - - const onFileChange = async (formData: FormData) => { - try { - setIsUploadingFile(true); - setUploadButtonLabel("Uploading document..."); - formData.append("id", props.chatThread.id); - const file: File | null = formData.get("file") as unknown as File; - const uploadResponse = await UploadDocument(formData); - - if (uploadResponse.success) { - setUploadButtonLabel("Indexing document..."); - const indexResponse = await IndexDocuments( - file.name, - uploadResponse.response, - props.chatThread.id - ); - - if (indexResponse.success) { - toast({ - title: "File upload", - description: `${file.name} uploaded successfully.`, - }); - setUploadButtonLabel(""); - setBody((e) => ({ ...e, chatOverFileName: file.name })); - } else { - toast({ - variant: "destructive", - description: indexResponse.error, - }); - } - } else { - toast({ - variant: "destructive", - description: "" + uploadResponse.error, - }); - } - } catch (error) { - toast({ - variant: "destructive", - description: "" + error, - }); - } finally { - setIsUploadingFile(false); - setUploadButtonLabel(""); - } - }; - - const ChatWindow = ( -
-
- -
-
- {messages.map((message, index) => ( - - ))} - {isLoading && } -
-
- ); +export const ChatUI: FC = () => { + const { messages } = useChatContext(); return (
{messages.length !== 0 ? ( - ChatWindow + ) : ( - + )} - +
); }; diff --git a/src/features/global-error/global-error-context.tsx b/src/features/global-message/global-message-context.tsx similarity index 60% rename from src/features/global-error/global-error-context.tsx rename to src/features/global-message/global-message-context.tsx index b31c0cae1..ec590b9d5 100644 --- a/src/features/global-error/global-error-context.tsx +++ b/src/features/global-message/global-message-context.tsx @@ -2,13 +2,19 @@ import { toast } from "@/components/ui/use-toast"; import { ToastAction } from "@radix-ui/react-toast"; import { createContext, useContext } from "react"; -interface GlobalErrorProps { +interface GlobalMessageProps { showError: (error: string, reload?: () => void) => void; + showSuccess: (message: MessageProp) => void; } -const GlobalErrorContext = createContext(null); +const GlobalMessageContext = createContext(null); -export const GlobalErrorProvider = ({ +interface MessageProp { + title: string; + description: string; +} + +export const GlobalMessageProvider = ({ children, }: { children: React.ReactNode; @@ -30,19 +36,24 @@ export const GlobalErrorProvider = ({ }); }; + const showSuccess = (message: MessageProp) => { + toast(message); + }; + return ( - {children} - + ); }; -export const useGlobalErrorContext = () => { - const context = useContext(GlobalErrorContext); +export const useGlobalMessageContext = () => { + const context = useContext(GlobalMessageContext); if (!context) { throw new Error("GlobalErrorContext is null"); } diff --git a/src/features/providers.tsx b/src/features/providers.tsx index 643cd2f06..1f72a6e9b 100644 --- a/src/features/providers.tsx +++ b/src/features/providers.tsx @@ -2,17 +2,17 @@ import { SessionProvider } from "next-auth/react"; import { SpeechProvider } from "./chat/chat-speech/speech-context"; -import { GlobalErrorProvider } from "./global-error/global-error-context"; +import { GlobalMessageProvider } from "./global-message/global-message-context"; import { MenuProvider } from "./menu/menu-context"; export const Providers = ({ children }: { children: React.ReactNode }) => { return ( - + {children} - + ); }; From 3c24cc0db366e7274c72c22e1a12eb7cd8366b55 Mon Sep 17 00:00:00 2001 From: thivy Date: Tue, 3 Oct 2023 10:55:02 +1100 Subject: [PATCH 09/13] fix document end point and update package --- infra/resources.bicep | 2 +- .../chat/chat-services/chat-document-service.ts | 7 +++---- src/features/chat/chat-speech/speech-service.ts | 1 + src/package-lock.json | 10 +++++----- src/package.json | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/infra/resources.bicep b/infra/resources.bicep index c61d63eb5..b7593ecad 100644 --- a/infra/resources.bicep +++ b/infra/resources.bicep @@ -127,7 +127,7 @@ resource webApp 'Microsoft.Web/sites@2020-06-01' = { } { name: 'AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT' - value: 'https://${location}.api.cognitive.microsoft.com/' + value: 'https://${form_recognizer_name}.cognitiveservices.azure.com/' } { name: 'SCM_DO_BUILD_DURING_DEPLOYMENT' diff --git a/src/features/chat/chat-services/chat-document-service.ts b/src/features/chat/chat-services/chat-document-service.ts index bb28992bd..dc549a48c 100644 --- a/src/features/chat/chat-services/chat-document-service.ts +++ b/src/features/chat/chat-services/chat-document-service.ts @@ -121,6 +121,7 @@ export const IndexDocuments = async ( ): Promise> => { try { const vectorStore = initAzureSearchVectorStore(); + const documentsToIndex: FaqDocumentIndex[] = []; let index = 0; for (const doc of docs) { @@ -138,6 +139,7 @@ export const IndexDocuments = async ( } await vectorStore.addDocuments(documentsToIndex); + await UpsertChatDocument(fileName, chatThreadId); return { success: true, @@ -169,10 +171,7 @@ export const initAzureSearchVectorStore = () => { export const initDocumentIntelligence = () => { const client = new DocumentAnalysisClient( process.env.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT, - new AzureKeyCredential(process.env.AZURE_DOCUMENT_INTELLIGENCE_KEY), - { - apiVersion: "2022-08-31", - } + new AzureKeyCredential(process.env.AZURE_DOCUMENT_INTELLIGENCE_KEY) ); return client; diff --git a/src/features/chat/chat-speech/speech-service.ts b/src/features/chat/chat-speech/speech-service.ts index 35d22b425..a3654dc75 100644 --- a/src/features/chat/chat-speech/speech-service.ts +++ b/src/features/chat/chat-speech/speech-service.ts @@ -8,6 +8,7 @@ export const GetSpeechToken = async () => { headers: { "Ocp-Apim-Subscription-Key": process.env.AZURE_SPEECH_KEY!, }, + cache: "no-store", } ); diff --git a/src/package-lock.json b/src/package-lock.json index e07ea5497..781357b63 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -8,7 +8,7 @@ "name": "azure-open-ai-accelerator", "version": "0.1.0", "dependencies": { - "@azure/ai-form-recognizer": "^4.1.0-beta.1", + "@azure/ai-form-recognizer": "^5.0.0", "@azure/cosmos": "^3.17.3", "@azure/identity": "^3.2.4", "@radix-ui/react-avatar": "^1.0.3", @@ -29,7 +29,7 @@ "eslint-config-next": "^13.4.12", "langchain": "^0.0.123", "lucide-react": "^0.264.0", - "microsoft-cognitiveservices-speech-sdk": "^1.31.0", + "microsoft-cognitiveservices-speech-sdk": "^1.32.0", "nanoid": "^4.0.2", "next": "^13.5.3", "next-auth": "^4.22.4", @@ -114,9 +114,9 @@ } }, "node_modules/@azure/ai-form-recognizer": { - "version": "4.1.0-beta.1", - "resolved": "https://registry.npmjs.org/@azure/ai-form-recognizer/-/ai-form-recognizer-4.1.0-beta.1.tgz", - "integrity": "sha512-p2MdiP8kGLZGpvN/DnKdsMW+rKpXPmivp164ZYCAibaoC0MAqxY3JdQeZY8mgseQ9CuJ97qOFsvxPke7dOxpCg==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@azure/ai-form-recognizer/-/ai-form-recognizer-5.0.0.tgz", + "integrity": "sha512-emWirkH87Oj5adkHBxcOUwxPhRxWL/lV1Kjo+0ujhZZ7J9CTruDbKvxWRihknDt55iEml3Qov2yTykpUtPWN2g==", "dependencies": { "@azure/abort-controller": "^1.0.0", "@azure/core-auth": "^1.3.0", diff --git a/src/package.json b/src/package.json index af48dec4a..de7dca943 100644 --- a/src/package.json +++ b/src/package.json @@ -9,7 +9,7 @@ "lint": "next lint" }, "dependencies": { - "@azure/ai-form-recognizer": "^4.1.0-beta.1", + "@azure/ai-form-recognizer": "^5.0.0", "@azure/cosmos": "^3.17.3", "@azure/identity": "^3.2.4", "@radix-ui/react-avatar": "^1.0.3", From a408b97c5179f7fc655df8e0259d3668e4e51d92 Mon Sep 17 00:00:00 2001 From: thivy Date: Tue, 3 Oct 2023 11:17:56 +1100 Subject: [PATCH 10/13] update documentation and bicep for azure speech --- docs/7-environment-variables.md | 6 ++++-- infra/resources.bicep | 32 ++++++++++++++++++++++++++++++++ src/.env.example | 8 ++++++-- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/docs/7-environment-variables.md b/docs/7-environment-variables.md index 6a4fd4a03..0accbe5b8 100644 --- a/docs/7-environment-variables.md +++ b/docs/7-environment-variables.md @@ -23,5 +23,7 @@ Below are the required environment variables, to be added to the Azure Portal or | `AZURE_SEARCH_NAME` | `https://AZURE_SEARCH_NAME.search.windows.net` | The deployment name of your Azure Cognitive Search | | `AZURE_SEARCH_INDEX_NAME` | | The index name with [vector search](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) enabled | | `AZURE_SEARCH_API_VERSION` | `2023-07-01-Preview` | API version which supports vector search `2023-07-01-Preview` | -| `AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT` | `https://REGION.api.cognitive.microsoft.com/` | Endpoint url of the Azure document intelligence. The REGION is specific to your Azure resource location | -| `AZURE_DOCUMENT_INTELLIGENCE_KEY` | | API keys of your Azure Document intelligence resource | +| `AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT` | `https://NAME.api.cognitive.microsoft.com/` | Endpoint url of the Azure document intelligence. The REGION is specific to your Azure resource location | +| `AZURE_SPEECH_REGION` | australiaeast | Region of your Azure Speech service | +| `AZURE_SPEECH_KEY` | | API Key of Azure Speech service | +| | diff --git a/infra/resources.bicep b/infra/resources.bicep index b7593ecad..506405f53 100644 --- a/infra/resources.bicep +++ b/infra/resources.bicep @@ -13,6 +13,7 @@ param embeddingDeploymentName string = 'text-embedding-ada-002' param embeddingDeploymentCapacity int = 30 param embeddingModelName string = 'text-embedding-ada-002' +param speechServiceSkuName string = 'S0' param formRecognizerSkuName string = 'S0' param searchServiceSkuName string = 'standard' param searchServiceIndexName string = 'azure-chat' @@ -27,6 +28,7 @@ param tags object = {} var openai_name = toLower('${name}ai${resourceToken}') var form_recognizer_name = toLower('${name}-form-${resourceToken}') +var speech_service_name = toLower('${name}-speech-${resourceToken}') var cosmos_name = toLower('${name}-cosmos-${resourceToken}') var search_name = toLower('${name}search${resourceToken}') var webapp_name = toLower('${name}-webapp-${resourceToken}') @@ -161,6 +163,14 @@ resource webApp 'Microsoft.Web/sites@2020-06-01' = { name: 'NEXTAUTH_URL' value: 'https://${webapp_name}.azurewebsites.net' } + { + name: 'AZURE_SPEECH_REGION' + value: resourceGroup().location + } + { + name: 'AZURE_SPEECH_KEY' + value: '@Microsoft.KeyVault(VaultName=${kv.name};SecretName=${kv::AZURE_SPEECH_KEY.name})' + } ] } } @@ -236,6 +246,15 @@ resource kv 'Microsoft.KeyVault/vaults@2021-06-01-preview' = { } } + resource AZURE_SPEECH_KEY 'secrets' = { + name: 'AZURE-SPEECH-KEY' + properties: { + contentType: 'text/plain' + value: speechService.listKeys().key1 + } + } + + resource AZURE_SEARCH_API_KEY 'secrets' = { name: 'AZURE-SEARCH-API-KEY' properties: { @@ -351,5 +370,18 @@ resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01 } }] +resource speechService 'Microsoft.CognitiveServices/accounts@2023-05-01' = { + name: speech_service_name + location: location + tags: tags + kind: 'SpeechServices' + properties: { + customSubDomainName: speech_service_name + publicNetworkAccess: 'Enabled' + } + sku: { + name: speechServiceSkuName + } +} output url string = 'https://${webApp.properties.defaultHostName}' diff --git a/src/.env.example b/src/.env.example index 11443c23e..6a9d2ab8d 100644 --- a/src/.env.example +++ b/src/.env.example @@ -38,5 +38,9 @@ AZURE_SEARCH_INDEX_NAME= AZURE_SEARCH_API_VERSION="2023-07-01-Preview" # Azure AI Document Intelligence to extract content from your data -AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT="https://REGION.api.cognitive.microsoft.com/" -AZURE_DOCUMENT_INTELLIGENCE_KEY= \ No newline at end of file +AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT="https://NAME.api.cognitive.microsoft.com/" +AZURE_DOCUMENT_INTELLIGENCE_KEY= + +# Azure Speech to Text to convert audio to text +AZURE_SPEECH_REGION="" +AZURE_SPEECH_KEY="" \ No newline at end of file From a726daac217e73ec9b1a16a3307b780933ec4484 Mon Sep 17 00:00:00 2001 From: thivy Date: Tue, 3 Oct 2023 11:22:18 +1100 Subject: [PATCH 11/13] fix nextjs cache --- .../azure-cog-vector-store.ts | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/features/langchain/vector-stores/azure-cog-search/azure-cog-vector-store.ts b/src/features/langchain/vector-stores/azure-cog-search/azure-cog-vector-store.ts index 767c396b0..48f17d2b7 100644 --- a/src/features/langchain/vector-stores/azure-cog-search/azure-cog-vector-store.ts +++ b/src/features/langchain/vector-stores/azure-cog-search/azure-cog-vector-store.ts @@ -35,8 +35,7 @@ type DocumentDeleteModel = { "@search.action": "delete"; }; - -export interface AzureCogDocument extends Record { } +export interface AzureCogDocument extends Record {} type AzureCogVectorField = { value: number[]; @@ -91,26 +90,33 @@ export class AzureCogSearch< } async deleteDocuments(chatThreadId: string): Promise { - // find all documents for chat thread - const documentsInChat = await this.fetcher(`${this.baseUrl}?api-version=${this._config.apiVersion}&search=${chatThreadId}&searchFields=chatThreadId&$select=id`, { - method: "GET", - body: null - }); + const documentsInChat = await this.fetcher( + `${this.baseUrl}?api-version=${this._config.apiVersion}&search=${chatThreadId}&searchFields=chatThreadId&$select=id`, + { + method: "GET", + body: null, + } + ); const documentsToDelete: DocumentDeleteModel[] = []; - documentsInChat.value.forEach(async (document: { id: string; }) => { - const doc: DocumentDeleteModel = {"@search.action": "delete", id: document.id}; + documentsInChat.value.forEach(async (document: { id: string }) => { + const doc: DocumentDeleteModel = { + "@search.action": "delete", + id: document.id, + }; documentsToDelete.push(doc); }); // delete the documents - const responseObj = await this.fetcher(`${this.baseUrl}/index?api-version=${this._config.apiVersion}`, { - method: "POST", - body: JSON.stringify({value: documentsToDelete}), - }); - + const responseObj = await this.fetcher( + `${this.baseUrl}/index?api-version=${this._config.apiVersion}`, + { + method: "POST", + body: JSON.stringify({ value: documentsToDelete }), + } + ); } /** * Search for the most similar documents to a query @@ -223,6 +229,7 @@ export class AzureCogSearch< "Content-Type": "application/json", "api-key": this._config.apiKey, }, + cache: "no-cache", }); if (!response.ok) { From 8f1ccea63991077fb99337a0593227e1163eb767 Mon Sep 17 00:00:00 2001 From: thivy Date: Tue, 3 Oct 2023 12:11:41 +1100 Subject: [PATCH 12/13] add check for speech config --- .../chat/chat-speech/speech-service.ts | 12 ++++ .../chat/chat-speech/use-speech-recognizer.ts | 71 ++++++++++--------- .../chat-speech/use-speech-synthesizer.ts | 6 ++ 3 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/features/chat/chat-speech/speech-service.ts b/src/features/chat/chat-speech/speech-service.ts index a3654dc75..16f320209 100644 --- a/src/features/chat/chat-speech/speech-service.ts +++ b/src/features/chat/chat-speech/speech-service.ts @@ -1,6 +1,18 @@ "use server"; export const GetSpeechToken = async () => { + if ( + process.env.AZURE_SPEECH_REGION === undefined || + process.env.AZURE_SPEECH_KEY === undefined + ) { + return { + error: true, + errorMessage: "Missing Azure Speech credentials", + token: "", + region: "", + }; + } + const response = await fetch( `https://${process.env.AZURE_SPEECH_REGION}.api.cognitive.microsoft.com/sts/v1.0/issueToken`, { diff --git a/src/features/chat/chat-speech/use-speech-recognizer.ts b/src/features/chat/chat-speech/use-speech-recognizer.ts index 15a3fafeb..c9531f91a 100644 --- a/src/features/chat/chat-speech/use-speech-recognizer.ts +++ b/src/features/chat/chat-speech/use-speech-recognizer.ts @@ -19,41 +19,44 @@ export const useSpeechRecognizer = () => { const startRecognition = async () => { const token = await GetSpeechToken(); - if (!token.error) { - setIsMicrophoneUsed(true); - const speechConfig = SpeechConfig.fromAuthorizationToken( - token.token, - token.region - ); - - const audioConfig = AudioConfig.fromDefaultMicrophoneInput(); - - const autoDetectSourceLanguageConfig = - AutoDetectSourceLanguageConfig.fromLanguages([ - "en-US", - "zh-CN", - "it-IT", - "pt-BR", - ]); - - const recognizer = SpeechRecognizer.FromConfig( - speechConfig, - autoDetectSourceLanguageConfig, - audioConfig - ); - - recognizerRef.current = recognizer; - - recognizer.recognizing = (s, e) => { - setSpeech(e.result.text); - }; - - recognizer.canceled = (s, e) => { - showError(e.errorDetails); - }; - - recognizer.startContinuousRecognitionAsync(); + if (token.error) { + showError(token.errorMessage); + return; } + + setIsMicrophoneUsed(true); + const speechConfig = SpeechConfig.fromAuthorizationToken( + token.token, + token.region + ); + + const audioConfig = AudioConfig.fromDefaultMicrophoneInput(); + + const autoDetectSourceLanguageConfig = + AutoDetectSourceLanguageConfig.fromLanguages([ + "en-US", + "zh-CN", + "it-IT", + "pt-BR", + ]); + + const recognizer = SpeechRecognizer.FromConfig( + speechConfig, + autoDetectSourceLanguageConfig, + audioConfig + ); + + recognizerRef.current = recognizer; + + recognizer.recognizing = (s, e) => { + setSpeech(e.result.text); + }; + + recognizer.canceled = (s, e) => { + showError(e.errorDetails); + }; + + recognizer.startContinuousRecognitionAsync(); }; const setSpeechText = (text: string) => { diff --git a/src/features/chat/chat-speech/use-speech-synthesizer.ts b/src/features/chat/chat-speech/use-speech-synthesizer.ts index 566b3b79a..0bc1f03fd 100644 --- a/src/features/chat/chat-speech/use-speech-synthesizer.ts +++ b/src/features/chat/chat-speech/use-speech-synthesizer.ts @@ -28,6 +28,12 @@ export const useSpeechSynthesizer = () => { } const tokenObj = await GetSpeechToken(); + + if (tokenObj.error) { + showError(tokenObj.errorMessage); + return; + } + const speechConfig = SpeechConfig.fromAuthorizationToken( tokenObj.token, tokenObj.region From 17d4be4accb0f630008028e3873d7a5206162fec Mon Sep 17 00:00:00 2001 From: thivy Date: Tue, 3 Oct 2023 12:13:25 +1100 Subject: [PATCH 13/13] fix alignment --- src/features/menu/menu.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/features/menu/menu.tsx b/src/features/menu/menu.tsx index 8b8cfb665..fe6a046be 100644 --- a/src/features/menu/menu.tsx +++ b/src/features/menu/menu.tsx @@ -59,7 +59,7 @@ export const MainMenu = () => { <> )}
-
+