Add greeting; Add setting of tts provider

zoollcar · Nov 19, 2024 · 107b378 · 107b378
1 parent e82460b
commit 107b378
Show file tree

Hide file tree

Showing 12 changed files with 635 additions and 43 deletions.
diff --git a/backend/.env.local.example b/backend/.env.local.example
@@ -1,3 +1,3 @@
 PORT=61234
 openai_endpoint="http://127.0.0.1:11434"
-cors_allowed_origins="https://you_domain"
+cors_allowed_origins='["http://localhost:5173"]'
diff --git a/backend/index.js b/backend/index.js
@@ -8,7 +8,7 @@ const port = process.env.PORT || 61234;
 
 app.use(express.json());
 var corsOptions = {
-  origin: process.env.cors_allowed_origins,
+  origin: JSON.parse(process.env.cors_allowed_origins),
   optionsSuccessStatus: 200 // some legacy browsers (IE11, various SmartTVs) choke on 204
 }
 app.use(cors(corsOptions));

diff --git a/src/App.tsx b/src/App.tsx
@@ -12,6 +12,7 @@ import {
   useOpenaiEndpoint,
   useOpenaiModelName,
   useUseBackendLLM,
+  useUseBackendTTS,
   useUseWebLLM,
 } from "./models/appstore.ts";
 import Debug from "./components/debug.tsx";
@@ -24,7 +25,12 @@ import Setting from "./components/setting.tsx";
 import { useSpeechRecognition } from "react-speech-recognition";
 import LLMChatWebLLM from "./models/llm/LLMChatWebLLM.ts";
 import { ChatCompletionChunk } from "@mlc-ai/web-llm";
-import { textToSpeech } from "./models/tts/textToSpeech.ts";
+import { defaultContext, promptHint } from "./models/prompt/static.ts";
+import { findTopic } from "./models/prompt/findTopic.ts";
+import {
+  textToSpeechUseBackend,
+  textToSpeechWeb,
+} from "./models/tts/textToSpeech.ts";
 
 export type contextType = {
   role: "user" | "assistant" | "system";
@@ -52,21 +58,19 @@ function addToContext(
 function App() {
   const [model, setModel] = useState<Live2DModel | null>(null);
   const stage = useRef<HTMLDivElement>(null);
-  const [context, setContext] = useState<contextType[]>([
-    {
-      role: "system",
-      content:
-        "You are a AI for chatting. Your job is to entertain users. let's make some short, funny, and humorous conversation",
-    },
-  ]);
+  const [context, setContext] = useState<contextType[]>(defaultContext);
   const [subtitle, setSubtitle] = useState("");
   const [debugMode, setDebugMode] = useState(false);
   const [showSetting, setShowSetting] = useState(false);
   const [showContext, setShowContext] = useState(false);
   const [chat, setChat] = useState<LLMChatWebLLM | LLMChatOpenAI | null>(null);
+  const TTS = useRef<
+    ((input: string, model?: string) => Promise<string>) | null
+  >(null);
 
   const [backendEndpoint] = useBackendEndpoint();
   const [useBackendLLM] = useUseBackendLLM();
+  const [useBackendTTS] = useUseBackendTTS();
   const [useWebLLM] = useUseWebLLM();
   const [openaiEndpoint] = useOpenaiEndpoint();
   const [openaiApikey] = useOpenaiApikey();
@@ -85,16 +89,19 @@ function App() {
             useBackendLLM ? backendEndpoint + "/llm" : openaiEndpoint
           )
     );
+    TTS.current = useBackendTTS ? textToSpeechUseBackend : textToSpeechWeb;
 
     return () => {
       setChat(null);
+      TTS.current = null;
     };
   }, [
     backendEndpoint,
     openaiApikey,
     openaiEndpoint,
     openaiModelName,
     useBackendLLM,
+    useBackendTTS,
     useWebLLM,
   ]);
 
@@ -135,10 +142,14 @@ function App() {
   }, [subtitle]);
 
   // after user speak
-  async function handleSpeechRecognized(context: contextType[]) {
+  async function handleSpeechRecognized(text: string) {
+    const newContext: contextType[] = [
+      ...context,
+      { role: "user", content: promptHint + text },
+    ];
     userSpeaking = false;
     if (!model || !chat) return;
-    const { stream, interruptGenerate } = await chat.ask(context);
+    const { stream, interruptGenerate } = await chat.ask(newContext);
     reader.stream = stream;
     reader.interruptGenerate = interruptGenerate;
     setContext((context) => [...context, { role: "assistant", content: "" }]);
@@ -154,21 +165,30 @@ function App() {
       currentSentence += llmResponse;
       if (/[.,!?]$/.test(currentSentence)) {
         addToContext(currentSentence, setContext);
-        const data = await textToSpeech(currentSentence, "tts");
+        console.log(TTS);
+        if (!TTS.current) {
+          alert("please wait for init");
+          return;
+        }
+        const data = await TTS.current(currentSentence, "tts");
         await handleSpeak(data, model);
         currentSentence = "";
       }
     }
     if (reader.stream && currentSentence !== "") {
       addToContext(currentSentence, setContext);
-      const data = await textToSpeech(currentSentence, "tts");
+      if (!TTS) {
+        alert("please wait for init");
+        return;
+      }
+      const data = await TTS.current(currentSentence, "tts");
       await handleSpeak(data, model);
     }
     reader.stream = null;
   }
 
   // when user speak break the ai speak
-  async function handleUserSpeaking(_text: string = "") {
+  async function handleUserSpeaking() {
     if (!model) return;
     userSpeaking = true;
     model.stopSpeaking();
@@ -264,14 +284,18 @@ function App() {
         return;
       }
     }
-    if (listening) {
-      stopListening();
-      setSubtitle("-- stop listening... --");
+    if (context.length === defaultContext.length) {
+      handleSpeechRecognized(findTopic());
     } else {
-      listenOnce();
-      // listenContinuously();
-      handleUserSpeaking("");
-      setSubtitle("-- start listening... --");
+      if (listening) {
+        stopListening();
+        setSubtitle("-- stop listening... --");
+      } else {
+        listenOnce();
+        // listenContinuously();
+        handleUserSpeaking();
+        setSubtitle("-- start listening... --");
+      }
     }
   }
 
@@ -294,10 +318,10 @@ function App() {
             ...context,
             { role: "user", content: text },
           ]);
-          handleSpeechRecognized([...context, { role: "user", content: text }]);
+          handleSpeechRecognized(text);
         }}
         onUserSpeaking={(text: string) => {
-          handleUserSpeaking(text);
+          handleUserSpeaking();
         }}
       />
 

diff --git a/src/components/debug.tsx b/src/components/debug.tsx
@@ -1,6 +1,6 @@
 import { useRef } from "react";
 import { Live2DModel, InternalModel } from "pixi-live2d-display-lipsyncpatch";
-import { textToSpeech } from "../models/tts/textToSpeech";
+import { textToSpeechWeb, textToSpeechUseBackend } from "../models/tts/textToSpeech";
 
 export default function Debug({
   model,
@@ -19,11 +19,20 @@ export default function Debug({
             <button
               className="bg-gray-200 rounded-sm"
               onClick={async () => {
-                const data = await textToSpeech("hello word", "tts");
+                const data = await textToSpeechWeb("hello word", "tts");
                 handleSpeak(data, model);
               }}
             >
-              test speaking
+              test speaking(web)
+            </button>
+            <button
+              className="bg-gray-200 rounded-sm"
+              onClick={async () => {
+                const data = await textToSpeechUseBackend("hello word", "tts");
+                handleSpeak(data, model);
+              }}
+            >
+              test speaking(backend)
             </button>
             <button
               className="bg-gray-200 rounded-sm"

diff --git a/src/components/setting.tsx b/src/components/setting.tsx
@@ -4,12 +4,14 @@ import {
   useOpenaiEndpoint,
   useOpenaiModelName,
   useUseBackendLLM,
+  useUseBackendTTS,
   useUseWebLLM,
 } from "../models/appstore";
 
 export default function Setting() {
   const [backendEndpoint, setBackendEndpoint] = useBackendEndpoint();
   const [useBackendLLM, setUseBackendLLM] = useUseBackendLLM();
+  const [useBackendTTS, setUseBackendTTS] = useUseBackendTTS();
   const [useWebLLM, setUseWebLLM] = useUseWebLLM();
   const [openaiEndpoint, setOpenaiEndpoint] = useOpenaiEndpoint();
   const [openaiApikey, setOpenaiApikey] = useOpenaiApikey();
@@ -42,6 +44,18 @@ export default function Setting() {
           />
         </label>
         <br />
+        <label>
+          use backendTTS?:
+          <input
+            className="bg-gray-200"
+            type="checkbox"
+            checked={useBackendTTS}
+            onChange={(e) => {
+              setUseBackendTTS(e.target.checked);
+            }}
+          />
+        </label>
+        <br />
         {!useBackendLLM && (
           <label>
             use webLLM

diff --git a/src/models/appstore.ts b/src/models/appstore.ts
@@ -7,6 +7,8 @@ type AppStore = {
   setBackendEndpoint: (backendEndpoint: string) => void;
   useBackendLLM: boolean;
   setUseBackendLLM: (useBackendLLM: boolean) => void;
+  useBackendTTS: boolean;
+  setUseBackendTTS: (useBackendTTS: boolean) => void;
   useWebLLM: boolean;
   setUseWebLLM: (useWebLLM: boolean) => void;
   openaiEndpoint: string;
@@ -25,6 +27,8 @@ const useAppStore = create<AppStore>()(
         _set({ backendEndpoint }),
       useBackendLLM: false,
       setUseBackendLLM: (useBackendLLM: boolean) => _set({ useBackendLLM }),
+      useBackendTTS: false,
+      setUseBackendTTS: (useBackendTTS: boolean) => _set({ useBackendTTS }),
       useWebLLM: true,
       setUseWebLLM: (useWebLLM: boolean) => _set({ useWebLLM }),
       openaiEndpoint: "http://localhost:11434/v1",
@@ -57,6 +61,13 @@ export const useUseBackendLLM = (): [
   useAppStore(
     useShallow((state) => [state.useBackendLLM, state.setUseBackendLLM])
   );
+export const useUseBackendTTS = (): [
+  boolean,
+  (useBackendTTS: boolean) => void
+] =>
+  useAppStore(
+    useShallow((state) => [state.useBackendTTS, state.setUseBackendTTS])
+  );
 export const useUseWebLLM = (): [boolean, (useWebLLM: boolean) => void] =>
   useAppStore(useShallow((state) => [state.useWebLLM, state.setUseWebLLM]));
 export const useOpenaiEndpoint = (): [

diff --git a/src/models/llm/LLMChatOpenAI.ts b/src/models/llm/LLMChatOpenAI.ts
@@ -19,14 +19,12 @@ export default class LLMChatOpenAI {
     });
   }
 
-  async ask(
-    context: contextType[]
-  ) {
+  async ask(context: contextType[]) {
     const data = {
       model: this.modelName,
       messages: context,
       stream: true,
-      max_completion_tokens: 1024,
+      max_tokens: 200,
       temperature: 0.75,
     };
 
@@ -41,7 +39,9 @@ export default class LLMChatOpenAI {
       },
     });
 
-    const interruptGenerate = ()=>{controller.abort()}
+    const interruptGenerate = () => {
+      controller.abort();
+    };
 
     return { stream, interruptGenerate };
   }

diff --git a/src/models/llm/LLMChatWebLLM.ts b/src/models/llm/LLMChatWebLLM.ts
@@ -54,7 +54,7 @@ export default class LLMChatWebLLM {
       // model: this.modelName,
       messages: context,
       stream: true,
-      max_completion_tokens: 1024,
+      max_tokens: 200,
       temperature: 0.75,
     };