From 33f5ddd801fce17acf61eb299dffe647438b9ea6 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Wed, 11 Dec 2024 21:38:42 +0100
Subject: [PATCH 01/14] =?UTF-8?q?=E7=89=B9=E5=AE=9AAI=E3=82=B5=E3=83=BC?=
 =?UTF-8?q?=E3=83=93=E3=82=B9=E3=81=AE=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB?=
 =?UTF-8?q?=E3=83=88=E3=81=AE=E3=83=A2=E3=83=87=E3=83=AB=E5=90=8D=E3=81=8C?=
 =?UTF-8?q?=E5=AD=98=E5=9C=A8=E3=81=97=E3=81=AA=E3=81=84=E3=82=82=E3=81=AE?=
 =?UTF-8?q?=E3=82=92=E6=8C=87=E5=AE=9A=E3=81=97=E3=81=A6=E3=81=84=E3=81=9F?=
 =?UTF-8?q?=E3=81=AE=E3=81=A7=E4=BF=AE=E6=AD=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/components/settings/modelProvider.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/components/settings/modelProvider.tsx b/src/components/settings/modelProvider.tsx
index d457a58..c6ef5d7 100644
--- a/src/components/settings/modelProvider.tsx
+++ b/src/components/settings/modelProvider.tsx
@@ -51,9 +51,9 @@ const ModelProvider = () => {
   // オブジェクトを定義して、各AIサービスのデフォルトモデルを保存する
   // ローカルLLMが選択された場合、AIモデルを空文字に設定
   const defaultModels = {
-    openai: 'gpt-4o',
+    openai: 'gpt-4o-2024-11-20',
     anthropic: 'claude-3-5-sonnet-20241022',
-    google: 'gemini-1.5-pro',
+    google: 'gemini-1.5-flash-latest',
     azure: '',
     groq: 'gemma-7b-it',
     cohere: 'command-r-plus',

From 0981ca79b8236b858463d1d8c01d07e30fbc0307 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Wed, 11 Dec 2024 21:56:09 +0100
Subject: [PATCH 02/14] =?UTF-8?q?TTS=E3=81=AB=E3=83=86=E3=82=AD=E3=82=B9?=
 =?UTF-8?q?=E3=83=88=E3=82=92=E6=B8=A1=E3=81=99=E5=89=8D=E3=81=AB=E7=B5=B5?=
 =?UTF-8?q?=E6=96=87=E5=AD=97=E5=89=8A=E9=99=A4=E3=81=AA=E3=81=A9=E3=81=AE?=
 =?UTF-8?q?=E5=8A=A0=E5=B7=A5=E5=87=A6=E7=90=86=E8=BF=BD=E5=8A=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/features/messages/speakCharacter.ts | 31 +++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts
index d931df0..9a7fc42 100644
--- a/src/features/messages/speakCharacter.ts
+++ b/src/features/messages/speakCharacter.ts
@@ -25,6 +25,28 @@ const typedEnglishToJapanese = englishToJapanese as EnglishToJapanese
 
 const speakQueue = new SpeakQueue()
 
+function preprocessMessage(
+  message: string,
+  settings: ReturnType<typeof settingsStore.getState>
+): string | null {
+  // 前後の空白を削除
+  let processed = message.trim()
+
+  // 英語から日本語への変換
+  if (settings.changeEnglishToJapanese && settings.selectLanguage === 'ja') {
+    processed = convertEnglishToJapaneseReading(processed)
+  }
+
+  // 絵文字を削除
+  processed = processed.replace(
+    /[\u{1F300}-\u{1F9FF}]|[\u{1F600}-\u{1F64F}]|[\u{1F680}-\u{1F6FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F900}-\u{1F9FF}]|[\u{1F1E0}-\u{1F1FF}]/gu,
+    ''
+  )
+
+  // 空文字列の場合はnullを返す
+  return processed || null
+}
+
 const createSpeakCharacter = () => {
   let lastTime = 0
   let prevFetchPromise: Promise<unknown> = Promise.resolve()
@@ -33,8 +55,13 @@ const createSpeakCharacter = () => {
     const ss = settingsStore.getState()
     onStart?.()
 
-    if (ss.changeEnglishToJapanese && ss.selectLanguage === 'ja') {
-      talk.message = convertEnglishToJapaneseReading(talk.message)
+    const processedMessage = preprocessMessage(talk.message, ss)
+    if (!processedMessage && !talk.buffer) {
+      return
+    }
+
+    if (processedMessage) {
+      talk.message = processedMessage
     }
 
     let isNeedDecode = true

From 98113fa6c08ade397a2c4765789388b1fb3060e7 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Wed, 11 Dec 2024 22:09:30 +0100
Subject: [PATCH 03/14] =?UTF-8?q?=E9=9F=B3=E5=A3=B0=E3=82=92=E4=B8=A6?=
 =?UTF-8?q?=E5=88=97=E5=87=A6=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/features/messages/speakCharacter.ts | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts
index 9a7fc42..a57881e 100644
--- a/src/features/messages/speakCharacter.ts
+++ b/src/features/messages/speakCharacter.ts
@@ -49,7 +49,6 @@ function preprocessMessage(
 
 const createSpeakCharacter = () => {
   let lastTime = 0
-  let prevFetchPromise: Promise<unknown> = Promise.resolve()
 
   return (talk: Talk, onStart?: () => void, onComplete?: () => void) => {
     const ss = settingsStore.getState()
@@ -66,12 +65,8 @@ const createSpeakCharacter = () => {
 
     let isNeedDecode = true
 
-    const fetchPromise = prevFetchPromise.then(async () => {
-      const now = Date.now()
-      if (now - lastTime < 1000) {
-        await wait(1000 - (now - lastTime))
-      }
-
+    // API呼び出しを即時実行
+    const fetchPromise = (async () => {
       let buffer
       try {
         if (talk.message == '' && talk.buffer) {
@@ -161,13 +156,10 @@ const createSpeakCharacter = () => {
         handleTTSError(error, ss.selectVoice)
         return null
       }
-      lastTime = Date.now()
       return buffer
-    })
-
-    prevFetchPromise = fetchPromise
+    })()
 
-    // キューを使用した処理に変更
+    // 音声バッファの取得後、再生キューに追加
     fetchPromise.then((audioBuffer) => {
       if (!audioBuffer) return
 

From a775750f5d8ca9ee941814a8ac3932ca764f384c Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Wed, 11 Dec 2024 23:04:36 +0100
Subject: [PATCH 04/14] =?UTF-8?q?README=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md         | 91 ++++++++-------------------------------------
 docs/README_en.md | 94 ++++++++---------------------------------------
 docs/README_ko.md | 15 ++++++++
 docs/README_zh.md | 15 ++++++++
 4 files changed, 61 insertions(+), 154 deletions(-)

diff --git a/README.md b/README.md
index 63d28e7..0169435 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,20 @@
 
 [![今日からあなたもAITuberデベロッパー｜ニケちゃん](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f)
 
+## ⚠️ セキュリティに関する重要な注意事項
+
+このリポジトリは、個人利用やローカル環境での開発はもちろん、適切なセキュリティ対策を施した上での商用利用も想定しています。ただし、Web環境にデプロイする際は以下の点にご注意ください：
+
+- **APIキーの取り扱い**: バックエンドサーバーを経由してAIサービス（OpenAI, Anthropic等）やTTSサービスのAPIを呼び出す仕様となっているため、APIキーの適切な管理が必要です。
+
+### 本番環境での利用について
+
+本番環境で利用する場合は、以下のいずれかの対応を推奨します：
+
+1. **バックエンドサーバーの実装**: APIキーの管理をサーバーサイドで行い、クライアントからの直接的なAPIアクセスを避ける
+2. **利用者への適切な説明**: 各利用者が自身のAPIキーを使用する場合は、セキュリティ上の注意点について説明する
+3. **アクセス制限の実装**: 必要に応じて、適切な認証・認可の仕組みを実装する
+
 ## 開発環境
 
 このプロジェクトは以下の環境で開発されています：
@@ -84,7 +98,7 @@ npm run dev
 
 ### 使用方法
 
-1. 設定画面で各種LLMのAPIキーを入力します。
+1. 設定画面で選択したLLMのAPIキーを入力します。
    - OpenAI
    - Anthropic
    - Google Gemini
@@ -108,6 +122,7 @@ npm run dev
    - ElevenLabs: 様々な言語の選択が可能です。APIキーの入力が必要です。
    - OpenAI: APIキーの入力が必要です。
    - Azure OpenAI: APIキーの入力が必要です。
+   - にじボイス: APIキーの入力が必要です。
 5. 入力フォームからキャラクターと会話を開始します。マイク入力も可能です。
 
 ## AITuber配信
@@ -302,77 +317,3 @@ npm run dev
 
 - [ロゴの利用規約](./docs/logo_licence.md)
 - [VRMモデルの利用規約](./docs/vrm_licence.md)
-
-## コントリビューター用TIPS
-
-### 新しい言語の追加方法
-
-新しい言語をプロジェクトに追加するには、以下の手順に従ってください。
-
-1. **言語ファイルの追加**:
-
-   - `locales` ディレクトリに新しい言語のディレクトリを作成し、その中に `translation.json` ファイルを作成します。
-   - 例: `locales/fr/translation.json` (フランス語の場合)
-
-2. **翻訳の追加**:
-
-   - `translation.json` ファイルに、既存の言語ファイルを参考にして翻訳を追加します。
-
-3. **言語設定の更新**:
-
-   - `src/lib/i18n.js` ファイルを開き、`resources` オブジェクトに新しい言語を追加します。
-
-   ```javascript:src/lib/i18n.js
-   resources: {
-     ...,
-     fr: {  // 新しい言語コード
-       translation: require("../../locales/fr/translation.json"),
-     },
-   },
-   ```
-
-4. **言語選択オプションの追加**:
-
-   - ユーザーが言語を選択できるように、UIの適切な部分（例えば設定画面の言語選択ドロップダウン）に新しい言語オプションを追加します。
-
-   ```typescript:src/components/settings.tsx
-   <select>
-     ...,
-     <option value="FR">フランス語 - French</option>
-   </select>
-   ```
-
-5. **テスト**:
-   - 新しい言語でアプリケーションが正しく表示されるかテストします。
-
-これで新しい言語のサポートがプロジェクトに追加されます。
-
-#### 音声言語コードの追加
-
-- 音声言語コードの対応も追加する必要があります。
-- `Introduction` コンポーネント内の `getVoiceLanguageCode` 関数に新しい言語コードを追加します。
-
-```typescript:nike-ChatVRM/src/components/introduction.tsx
-const getVoiceLanguageCode = (selectLanguage: string) => {
-  switch (selectLanguage) {
-    case 'JP':
-      return 'ja-JP';
-    case 'EN':
-      return 'en-US';
-    case 'ZH':
-      return 'zh-TW';
-    case 'zh-TW':
-      return 'zh-TW';
-    case 'KO':
-      return 'ko-KR';
-    case 'FR':
-      return 'fr-FR';
-    default:
-      return 'ja-JP';
-  }
-}
-```
-
-#### READMEの追加
-
-- 新しい言語のREADME (`README_fr.md`), ロゴ利用規約 (`logo_licence_fr.md`), VRMモデル利用規約 (`vrm_licence_fr.md`) を `docs` ディレクトリに追加してください。
diff --git a/docs/README_en.md b/docs/README_en.md
index 39b8c7b..a605978 100644
--- a/docs/README_en.md
+++ b/docs/README_en.md
@@ -40,6 +40,20 @@ I've written a detailed usage guide in the article below:
 
 [![You are AITuber Developer from Today | Nike-chan](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f)
 
+## ⚠️ Important Security Notice
+
+This repository is designed for personal use, local development, and commercial use with appropriate security measures. However, please note the following points when deploying to a web environment:
+
+- **API Key Handling**: Since the specification requires API calls to AI services (OpenAI, Anthropic, etc.) and TTS services through a backend server, proper management of API keys is necessary.
+
+### Regarding Production Environment Usage
+
+When using in a production environment, we recommend one of the following approaches:
+
+1. **Backend Server Implementation**: Manage API keys on the server side and avoid direct API access from clients
+2. **Proper User Instructions**: When users use their own API keys, explain the security considerations
+3. **Access Control Implementation**: Implement appropriate authentication and authorization mechanisms as needed
+
 ## Development Environment
 
 This project is developed in the following environment:
@@ -109,6 +123,7 @@ npm run dev
    - ElevenLabs: Various language selection is possible. Please enter the API key.
    - OpenAI: API key is required.
    - Azure OpenAI: API key is required.
+   - Nijivoice: API key is required.
 5. Start conversing with the character from the input form. Microphone input is also possible.
 
 ## AITuber Streaming
@@ -303,82 +318,3 @@ From version v2.0.0, this project adopts a **custom license**.
 
 - [Logo Usage Agreement](./logo_licence_en.md)
 - [VRM Model Usage Agreement](./vrm_licence_en.md)
-
-## Tips for Contributors
-
-### How to Add a New Language
-
-To add a new language to the project, follow these steps:
-
-1. **Add Language File**:
-
-   - Create a new language directory in the `locales` directory and create a `translation.json` file inside it.
-   - Example: `locales/fr/translation.json` (for French)
-
-2. **Add Translations**:
-
-   - Add translations to the `translation.json` file, referring to existing language files.
-
-3. **Update Language Settings**:
-
-   - Open the `src/lib/i18n.js` file and add the new language to the `resources` object.
-
-   ```javascript:src/lib/i18n.js
-   resources: {
-     ...,
-     fr: {  // New language code
-       translation: require("../../locales/fr/translation.json"),
-     },
-   },
-   ```
-
-4. **Add Language Selection Option**:
-
-   - Add a new language option to the appropriate part of the UI (e.g., language selection dropdown in the settings screen) so users can select the language.
-
-   ```typescript:src/components/settings.tsx
-   <select>
-     ...,
-     <option value="FR">French - Français</option>
-   </select>
-   ```
-
-5. **Test**:
-   - Test if the application displays correctly in the new language.
-
-This will add support for the new language to the project.
-
-#### Adding Voice Language Code
-
-- You also need to add support for the voice language code.
-- Add the new language code to the `getVoiceLanguageCode` function in the `Introduction` component.
-
-```typescript:nike-ChatVRM/src/components/introduction.tsx
-const getVoiceLanguageCode = (selectLanguage: string) => {
-  switch (selectLanguage) {
-    case 'JP':
-      return 'ja-JP';
-    case 'EN':
-      return 'en-US';
-    case 'ZH':
-      return 'zh-TW';
-    case 'zh-TW':
-      return 'zh-TW';
-    case 'KO':
-      return 'ko-KR';
-    case 'FR':
-      return 'fr-FR';
-    default:
-      return 'ja-JP';
-  }
-}
-```
-
-#### Adding README
-
-- Add a new language README (`README_fr.md`), logo usage terms (`logo_licence_fr.md`), and VRM model usage terms (`vrm_licence_fr.md`) to the `docs` directory.
-
-```
-
-
-```
diff --git a/docs/README_ko.md b/docs/README_ko.md
index d1a5741..c4d596d 100644
--- a/docs/README_ko.md
+++ b/docs/README_ko.md
@@ -40,6 +40,20 @@
 
 [![오늘부터 당신도 AITuber 개발자｜니케짱](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f)
 
+## ⚠️ 보안에 관한 중요 주의사항
+
+이 저장소는 개인 사용과 로컬 환경에서의 개발은 물론, 적절한 보안 대책을 마련한 상태에서의 상업적 사용도 고려하고 있습니다. 단, 웹 환경에 배포할 때는 다음 사항에 주의해 주시기 바랍니다:
+
+- **API 키 취급**: 백엔드 서버를 통해 AI 서비스(OpenAI, Anthropic 등)와 TTS 서비스의 API를 호출하는 사양이므로, API 키의 적절한 관리가 필요합니다.
+
+### 프로덕션 환경에서의 사용에 대하여
+
+프로덕션 환경에서 사용할 경우, 다음 중 하나의 대응을 권장합니다:
+
+1. **백엔드 서버 구현**: API 키 관리를 서버 사이드에서 수행하여 클라이언트로부터의 직접적인 API 접근을 피하기
+2. **사용자에 대한 적절한 설명**: 각 사용자가 자신의 API 키를 사용하는 경우, 보안상의 주의사항에 대해 설명하기
+3. **접근 제한 구현**: 필요에 따라 적절한 인증・인가 메커니즘을 구현하기
+
 ## 개발 환경
 
 이 프로젝트는 다음과 같은 환경에서 개발되었습니다:
@@ -109,6 +123,7 @@ npm run dev
    - ElevenLabs: 다양한 언어 선택이 가능합니다. API 키를 입력하세요.
    - OpenAI: API 키가 필요합니다.
    - Azure OpenAI: API 키가 필요합니다.
+   - Nijivoice: API 키가 필요합니다.
 5. 입력 양식에서 캐릭터와 대화를 시작합니다. 마이크 입력도 가능합니다.
 
 ## AITuber 스트리밍
diff --git a/docs/README_zh.md b/docs/README_zh.md
index 0347c5b..7d8fadb 100644
--- a/docs/README_zh.md
+++ b/docs/README_zh.md
@@ -39,6 +39,20 @@
 
 [![今天成為AITuber開發者 | Nike-chan](https://github.com/tegnike/aituber-kit/assets/35606144/a958f505-72f9-4665-ab6c-b57b692bb166)](https://note.com/nike_cha_n/n/ne98acb25e00f)
 
+## ⚠️ 安全性相關重要注意事項
+
+本存儲庫不僅考慮個人使用和本地環境開發，還考慮在採取適當安全措施的情況下進行商業使用。但是，在部署到Web環境時，請注意以下幾點：
+
+- **API密鑰的處理**: 由於系統設計需要通過後端服務器調用AI服務（如OpenAI、Anthropic等）和TTS服務的API，因此需要適當管理API密鑰。
+
+### 關於生產環境的使用
+
+在生產環境中使用時，建議採取以下其中一種方案：
+
+1. **實現後端服務器**: 在服務器端管理API密鑰，避免客戶端直接訪問API
+2. **向用戶提供適當說明**: 當用戶使用自己的API密鑰時，需要說明安全注意事項
+3. **實現訪問限制**: 根據需要實現適當的身份驗證和授權機制
+
 ## 開發環境
 
 此項目在以下環境中開發：
@@ -108,6 +122,7 @@ npm run dev
    - ElevenLabs：支持多種語言選擇。需要輸入API密鑰。
    - OpenAI：需要API密鑰。
    - Azure OpenAI：需要API密鑰。
+   - Nijivoice：需要API密鑰。
 5. 從輸入表單開始與角色對話。也可以使用麥克風輸入。
 
 ## AITuber直播

From 1500e7499cde511351560a67c3a51b6b570e8660 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 00:09:13 +0100
Subject: [PATCH 05/14] =?UTF-8?q?Issue=E7=94=A8=E3=82=A2=E3=83=8A=E3=83=A9?=
 =?UTF-8?q?=E3=82=A4=E3=82=B6=E3=82=92=E4=B8=80=E6=97=A6=E9=9D=9E=E6=B4=BB?=
 =?UTF-8?q?=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/issue-analyzer.yml | 78 ++++++++++++++--------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/issue-analyzer.yml b/.github/workflows/issue-analyzer.yml
index 1712c0c..a41371d 100644
--- a/.github/workflows/issue-analyzer.yml
+++ b/.github/workflows/issue-analyzer.yml
@@ -1,44 +1,44 @@
-name: Issue Analyzer
+# name: Issue Analyzer
 
-on:
-  issues:
-    types: [opened]
-  workflow_dispatch:
-    inputs:
-      issue_number:
-        description: 'Issue Number'
-        required: true
-      issue_title:
-        description: 'Issue Title'
-        required: true
-      issue_body:
-        description: 'Issue Body'
-        required: true
+# on:
+#   issues:
+#     types: [opened]
+#   workflow_dispatch:
+#     inputs:
+#       issue_number:
+#         description: 'Issue Number'
+#         required: true
+#       issue_title:
+#         description: 'Issue Title'
+#         required: true
+#       issue_body:
+#         description: 'Issue Body'
+#         required: true
 
-jobs:
-  analyze-issue:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
+# jobs:
+#   analyze-issue:
+#     runs-on: ubuntu-latest
+#     permissions:
+#       issues: write
+#     steps:
+#       - name: Checkout repository
+#         uses: actions/checkout@v2
 
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: '3.x'
+#       - name: Set up Python
+#         uses: actions/setup-python@v2
+#         with:
+#           python-version: '3.x'
 
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install requests anthropic
+#       - name: Install dependencies
+#         run: |
+#           python -m pip install --upgrade pip
+#           pip install requests anthropic
 
-      - name: Analyze issue
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          ISSUE_NUMBER: ${{ github.event.inputs.issue_number || github.event.issue.number }}
-          ISSUE_TITLE: ${{ github.event.inputs.issue_title || github.event.issue.title }}
-          ISSUE_BODY: ${{ github.event.inputs.issue_body || github.event.issue.body }}
-        run: python scripts/analyze_issue.py
+#       - name: Analyze issue
+#         env:
+#           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+#           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+#           ISSUE_NUMBER: ${{ github.event.inputs.issue_number || github.event.issue.number }}
+#           ISSUE_TITLE: ${{ github.event.inputs.issue_title || github.event.issue.title }}
+#           ISSUE_BODY: ${{ github.event.inputs.issue_body || github.event.issue.body }}
+#         run: python scripts/analyze_issue.py

From 5ffe9e611c9a26e989dae8b62188337a3d5037fa Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 00:20:19 +0100
Subject: [PATCH 06/14] =?UTF-8?q?gemini=E3=81=AE=E6=96=B0=E3=81=97?=
 =?UTF-8?q?=E3=81=84=E3=83=A2=E3=83=87=E3=83=AB=E8=BF=BD=E5=8A=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/components/settings/modelProvider.tsx | 13 ++++++++-----
 src/components/settings/slideConvert.tsx  | 13 ++++++++-----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/components/settings/modelProvider.tsx b/src/components/settings/modelProvider.tsx
index c6ef5d7..cdddb09 100644
--- a/src/components/settings/modelProvider.tsx
+++ b/src/components/settings/modelProvider.tsx
@@ -408,6 +408,12 @@ const ModelProvider = () => {
                     })
                   }
                 >
+                  <option value="gemini-1.5-flash-latest">
+                    gemini-1.5-flash-latest
+                  </option>
+                  <option value="gemini-1.5-pro-latest">
+                    gemini-1.5-pro-latest
+                  </option>
                   <option value="gemini-1.5-flash-exp-0827">
                     gemini-1.5-flash-exp-0827
                   </option>
@@ -417,11 +423,8 @@ const ModelProvider = () => {
                   <option value="gemini-1.5-flash-8b-exp-0827">
                     gemini-1.5-flash-8b-exp-0827
                   </option>
-                  <option value="gemini-1.5-pro-latest">
-                    gemini-1.5-pro-latest
-                  </option>
-                  <option value="gemini-1.5-flash-latest">
-                    gemini-1.5-flash-latest
+                  <option value="gemini-2.0-flash-exp">
+                    gemini-2.0-flash-exp
                   </option>
                 </select>
               </div>
diff --git a/src/components/settings/slideConvert.tsx b/src/components/settings/slideConvert.tsx
index b218ae4..727bc5a 100644
--- a/src/components/settings/slideConvert.tsx
+++ b/src/components/settings/slideConvert.tsx
@@ -157,6 +157,12 @@ const SlideConvert: React.FC<SlideConvertProps> = ({ onFolderUpdate }) => {
           )}
           {aiService === 'google' && (
             <>
+              <option value="gemini-1.5-flash-latest">
+                gemini-1.5-flash-latest
+              </option>
+              <option value="gemini-1.5-pro-latest">
+                gemini-1.5-pro-latest
+              </option>
               <option value="gemini-1.5-flash-exp-0827">
                 gemini-1.5-flash-exp-0827
               </option>
@@ -166,11 +172,8 @@ const SlideConvert: React.FC<SlideConvertProps> = ({ onFolderUpdate }) => {
               <option value="gemini-1.5-flash-8b-exp-0827">
                 gemini-1.5-flash-8b-exp-0827
               </option>
-              <option value="gemini-1.5-pro-latest">
-                gemini-1.5-pro-latest
-              </option>
-              <option value="gemini-1.5-flash-latest">
-                gemini-1.5-flash-latest
+              <option value="gemini-2.0-flash-exp">
+                gemini-2.0-flash-exp
               </option>
             </>
           )}

From 9360b87bb1fd9679b667cd3bbe4055a57b42467a Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 00:22:29 +0100
Subject: [PATCH 07/14] lint fix

---
 src/components/settings/slideConvert.tsx | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/components/settings/slideConvert.tsx b/src/components/settings/slideConvert.tsx
index 727bc5a..239e87b 100644
--- a/src/components/settings/slideConvert.tsx
+++ b/src/components/settings/slideConvert.tsx
@@ -172,9 +172,7 @@ const SlideConvert: React.FC<SlideConvertProps> = ({ onFolderUpdate }) => {
               <option value="gemini-1.5-flash-8b-exp-0827">
                 gemini-1.5-flash-8b-exp-0827
               </option>
-              <option value="gemini-2.0-flash-exp">
-                gemini-2.0-flash-exp
-              </option>
+              <option value="gemini-2.0-flash-exp">gemini-2.0-flash-exp</option>
             </>
           )}
         </select>

From 72f3c5dae78ec05d435f105e1713730ff234f573 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 18:45:04 +0100
Subject: [PATCH 08/14] =?UTF-8?q?TTS=E3=81=AE=E9=A0=86=E7=95=AA=E3=81=8C?=
 =?UTF-8?q?=E6=8B=85=E4=BF=9D=E3=81=95=E3=82=8C=E3=81=A6=E3=81=84=E3=81=AA?=
 =?UTF-8?q?=E3=81=8B=E3=81=A3=E3=81=9F=E3=81=AE=E3=81=A7=E4=BF=AE=E6=AD=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/features/messages/speakCharacter.ts | 191 ++++++++++++------------
 src/features/messages/speakQueue.ts     |  22 ++-
 2 files changed, 108 insertions(+), 105 deletions(-)

diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts
index a57881e..78c3d8a 100644
--- a/src/features/messages/speakCharacter.ts
+++ b/src/features/messages/speakCharacter.ts
@@ -1,7 +1,6 @@
 import homeStore from '@/features/stores/home'
 import settingsStore from '@/features/stores/settings'
 import englishToJapanese from '@/utils/englishToJapanese.json'
-import { wait } from '@/utils/wait'
 import { Talk } from './messages'
 import { synthesizeStyleBertVITS2Api } from './synthesizeStyleBertVITS2'
 import { synthesizeVoiceKoeiromapApi } from './synthesizeVoiceKoeiromap'
@@ -48,8 +47,6 @@ function preprocessMessage(
 }
 
 const createSpeakCharacter = () => {
-  let lastTime = 0
-
   return (talk: Talk, onStart?: () => void, onComplete?: () => void) => {
     const ss = settingsStore.getState()
     onStart?.()
@@ -65,110 +62,110 @@ const createSpeakCharacter = () => {
 
     let isNeedDecode = true
 
-    // API呼び出しを即時実行
-    const fetchPromise = (async () => {
-      let buffer
+    // audioBufferのフェッチを開始
+    const audioBufferPromise: Promise<ArrayBuffer | null> = (async () => {
       try {
-        if (talk.message == '' && talk.buffer) {
-          buffer = talk.buffer
+        if (talk.message === '' && talk.buffer) {
           isNeedDecode = false
+          return talk.buffer
         } else if (ss.audioMode) {
-          buffer = null
-        } else if (ss.selectVoice == 'koeiromap') {
-          buffer = await synthesizeVoiceKoeiromapApi(
-            talk,
-            ss.koeiromapKey,
-            ss.koeiroParam
-          )
-        } else if (ss.selectVoice == 'voicevox') {
-          buffer = await synthesizeVoiceVoicevoxApi(
-            talk,
-            ss.voicevoxSpeaker,
-            ss.voicevoxSpeed,
-            ss.voicevoxPitch,
-            ss.voicevoxIntonation
-          )
-        } else if (ss.selectVoice == 'google') {
-          buffer = await synthesizeVoiceGoogleApi(
-            talk,
-            ss.googleTtsType,
-            ss.selectLanguage
-          )
-        } else if (ss.selectVoice == 'stylebertvits2') {
-          buffer = await synthesizeStyleBertVITS2Api(
-            talk,
-            ss.stylebertvits2ServerUrl,
-            ss.stylebertvits2ApiKey,
-            ss.stylebertvits2ModelId,
-            ss.stylebertvits2Style,
-            ss.stylebertvits2SdpRatio,
-            ss.stylebertvits2Length,
-            ss.selectLanguage
-          )
-        } else if (ss.selectVoice == 'aivis_speech') {
-          buffer = await synthesizeVoiceAivisSpeechApi(
-            talk,
-            ss.aivisSpeechSpeaker,
-            ss.aivisSpeechSpeed,
-            ss.aivisSpeechPitch,
-            ss.aivisSpeechIntonation
-          )
-        } else if (ss.selectVoice == 'gsvitts') {
-          buffer = await synthesizeVoiceGSVIApi(
-            talk,
-            ss.gsviTtsServerUrl,
-            ss.gsviTtsModelId,
-            ss.gsviTtsBatchSize,
-            ss.gsviTtsSpeechRate
-          )
-        } else if (ss.selectVoice == 'elevenlabs') {
-          buffer = await synthesizeVoiceElevenlabsApi(
-            talk,
-            ss.elevenlabsApiKey,
-            ss.elevenlabsVoiceId,
-            ss.selectLanguage
-          )
-        } else if (ss.selectVoice == 'openai') {
-          buffer = await synthesizeVoiceOpenAIApi(
-            talk,
-            ss.openaiTTSKey || ss.openaiKey,
-            ss.openaiTTSVoice,
-            ss.openaiTTSModel,
-            ss.openaiTTSSpeed
-          )
-        } else if (ss.selectVoice == 'azure') {
-          buffer = await synthesizeVoiceAzureOpenAIApi(
-            talk,
-            ss.azureTTSKey || ss.azureKey,
-            ss.azureTTSEndpoint || ss.azureEndpoint,
-            ss.openaiTTSVoice,
-            ss.openaiTTSSpeed
-          )
-        } else if (ss.selectVoice == 'nijivoice') {
-          buffer = await synthesizeVoiceNijivoiceApi(
-            talk,
-            ss.nijivoiceApiKey,
-            ss.nijivoiceActorId,
-            ss.nijivoiceSpeed
-          )
+          return null
+        }
+
+        // 選択されたボイスに応じたTTS APIを呼び出す
+        switch (ss.selectVoice) {
+          case 'koeiromap':
+            return await synthesizeVoiceKoeiromapApi(
+              talk,
+              ss.koeiromapKey,
+              ss.koeiroParam
+            )
+          case 'voicevox':
+            return await synthesizeVoiceVoicevoxApi(
+              talk,
+              ss.voicevoxSpeaker,
+              ss.voicevoxSpeed,
+              ss.voicevoxPitch,
+              ss.voicevoxIntonation
+            )
+          case 'google':
+            return await synthesizeVoiceGoogleApi(
+              talk,
+              ss.googleTtsType,
+              ss.selectLanguage
+            )
+          case 'stylebertvits2':
+            return await synthesizeStyleBertVITS2Api(
+              talk,
+              ss.stylebertvits2ServerUrl,
+              ss.stylebertvits2ApiKey,
+              ss.stylebertvits2ModelId,
+              ss.stylebertvits2Style,
+              ss.stylebertvits2SdpRatio,
+              ss.stylebertvits2Length,
+              ss.selectLanguage
+            )
+          case 'aivis_speech':
+            return await synthesizeVoiceAivisSpeechApi(
+              talk,
+              ss.aivisSpeechSpeaker,
+              ss.aivisSpeechSpeed,
+              ss.aivisSpeechPitch,
+              ss.aivisSpeechIntonation
+            )
+          case 'gsvitts':
+            return await synthesizeVoiceGSVIApi(
+              talk,
+              ss.gsviTtsServerUrl,
+              ss.gsviTtsModelId,
+              ss.gsviTtsBatchSize,
+              ss.gsviTtsSpeechRate
+            )
+          case 'elevenlabs':
+            return await synthesizeVoiceElevenlabsApi(
+              talk,
+              ss.elevenlabsApiKey,
+              ss.elevenlabsVoiceId,
+              ss.selectLanguage
+            )
+          case 'openai':
+            return await synthesizeVoiceOpenAIApi(
+              talk,
+              ss.openaiTTSKey || ss.openaiKey,
+              ss.openaiTTSVoice,
+              ss.openaiTTSModel,
+              ss.openaiTTSSpeed
+            )
+          case 'azure':
+            return await synthesizeVoiceAzureOpenAIApi(
+              talk,
+              ss.azureTTSKey || ss.azureKey,
+              ss.azureTTSEndpoint || ss.azureEndpoint,
+              ss.openaiTTSVoice,
+              ss.openaiTTSSpeed
+            )
+          case 'nijivoice':
+            return await synthesizeVoiceNijivoiceApi(
+              talk,
+              ss.nijivoiceApiKey,
+              ss.nijivoiceActorId,
+              ss.nijivoiceSpeed
+            )
+          default:
+            throw new Error('Unsupported voice type')
         }
       } catch (error) {
         handleTTSError(error, ss.selectVoice)
         return null
       }
-      return buffer
     })()
 
-    // 音声バッファの取得後、再生キューに追加
-    fetchPromise.then((audioBuffer) => {
-      if (!audioBuffer) return
-
-      speakQueue.addTask({
-        audioBuffer,
-        talk,
-        isNeedDecode,
-        onComplete,
-      })
+    // タスクをSpeakQueueに追加
+    speakQueue.addTask({
+      audioBufferPromise,
+      talk,
+      isNeedDecode,
+      onComplete,
     })
   }
 }
diff --git a/src/features/messages/speakQueue.ts b/src/features/messages/speakQueue.ts
index fbc18b9..a7aa26b 100644
--- a/src/features/messages/speakQueue.ts
+++ b/src/features/messages/speakQueue.ts
@@ -1,8 +1,8 @@
 import { Talk } from './messages'
 import homeStore from '@/features/stores/home'
 
-type SpeakTask = {
-  audioBuffer: ArrayBuffer
+type SpeakTaskWithPromise = {
+  audioBufferPromise: Promise<ArrayBuffer | null>
   talk: Talk
   isNeedDecode: boolean
   onComplete?: () => void
@@ -10,12 +10,12 @@ type SpeakTask = {
 
 export class SpeakQueue {
   private static readonly QUEUE_CHECK_DELAY = 1500
-  private queue: SpeakTask[] = []
+  private queue: SpeakTaskWithPromise[] = []
   private isProcessing = false
 
-  async addTask(task: SpeakTask) {
+  addTask(task: SpeakTaskWithPromise) {
     this.queue.push(task)
-    await this.processQueue()
+    this.processQueue()
   }
 
   private async processQueue() {
@@ -27,9 +27,15 @@ export class SpeakQueue {
       const task = this.queue.shift()
       if (task) {
         try {
-          const { audioBuffer, talk, isNeedDecode, onComplete } = task
-          await hs.viewer.model?.speak(audioBuffer, talk, isNeedDecode)
-          onComplete?.()
+          const audioBuffer = await task.audioBufferPromise
+          if (audioBuffer) {
+            await hs.viewer.model?.speak(
+              audioBuffer,
+              task.talk,
+              task.isNeedDecode
+            )
+            task.onComplete?.()
+          }
         } catch (error) {
           console.error(
             'An error occurred while processing the speech synthesis task:',

From fdd9b6e48ced4bc0b96a87bef0fa566796faca75 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 20:29:52 +0100
Subject: [PATCH 09/14] =?UTF-8?q?=E3=83=AA=E3=82=A2=E3=83=AB=E3=82=BF?=
 =?UTF-8?q?=E3=82=A4=E3=83=A0API=E3=83=A2=E3=83=BC=E3=83=89=E6=99=82?=
 =?UTF-8?q?=E3=81=AB=E9=9F=B3=E5=A3=B0=E8=A8=AD=E5=AE=9A=E3=82=92=E9=9D=9E?=
 =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E3=81=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 locales/en/translation.json               |  3 ++-
 locales/ja/translation.json               |  3 ++-
 locales/ko/translation.json               |  3 ++-
 locales/zh/translation.json               |  3 ++-
 src/components/settings/modelProvider.tsx |  2 +-
 src/components/settings/voice.tsx         | 12 ++++++++++++
 6 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/locales/en/translation.json b/locales/en/translation.json
index 6e118c6..54c65a2 100644
--- a/locales/en/translation.json
+++ b/locales/en/translation.json
@@ -197,5 +197,6 @@
     "useCurrentSystemPrompt": "Use AITuberKit system prompt",
     "userInputTitle": "Send user input",
     "userInputDescription": "The message sent is processed the same as when input from the AITuberKit input form. If multiple messages are sent, they are processed in order. The AI model and voice model are the ones selected in the AITuberKit settings. The system prompt and conversation history are the values set in AITuberKit."
-  }
+  },
+  "CannotUseVoice": "Realtime API mode or Audio mode is enabled, so voice settings are not required."
 }
diff --git a/locales/ja/translation.json b/locales/ja/translation.json
index 958c9f1..71e61ef 100644
--- a/locales/ja/translation.json
+++ b/locales/ja/translation.json
@@ -198,5 +198,6 @@
     "useCurrentSystemPrompt": "AITuberKitのシステムプロンプトを利用する",
     "userInputTitle": "ユーザー入力を送信する",
     "userInputDescription": "送信したメッセージはAITuberKitの入力フォームから入力された場合と同じ処理がされます。複数送信した場合は順番に処理されます。\nAIモデルおよび音声モデルはAITuberKitの設定で選択したものが使用されます。\nシステムプロンプトや会話履歴はAITuberKitの値が使用されます。"
-  }
+  },
+  "CannotUseVoice": "リアルタイムAPIモード または オーディオモードが有効の場合、\n音声設定は不要です。"
 }
diff --git a/locales/ko/translation.json b/locales/ko/translation.json
index 039a1ac..0235900 100644
--- a/locales/ko/translation.json
+++ b/locales/ko/translation.json
@@ -197,5 +197,6 @@
     "useCurrentSystemPrompt": "AITuberKit의 시스템 프롬프트를 사용하기",
     "userInputTitle": "사용자 입력 보내기",
     "userInputDescription": "보낸 메시지는 AITuberKit의 입력 폼에서 입력한 경우와 동일한 처리가 됩니다. 여러 메시지를 보낸 경우 순서대로 처리됩니다.\nAI 모델과 음성 모델은 AITuberKit의 설정에서 선택한 것이 사용됩니다.\n시스템 프롬프트와 대화 기록은 AITuberKit의 값이 사용됩니다."
-  }
+  },
+  "CannotUseVoice": "리얼타임 API 모드 또는 오디오 모드가 활성화된 경우, 음성 설정은 필요하지 않습니다."
 }
diff --git a/locales/zh/translation.json b/locales/zh/translation.json
index 8de358f..a2e5ad0 100644
--- a/locales/zh/translation.json
+++ b/locales/zh/translation.json
@@ -197,5 +197,6 @@
     "useCurrentSystemPrompt": "使用AITuberKit的系統提示",
     "userInputTitle": "用戶輸入發送",
     "userInputDescription": "發送的訊息將與AITuberKit的輸入框中輸入的訊息相同。如果發送多個訊息，將按順序處理。\nAI模型和語音模型將使用AITuberKit的設定中選擇的模型。\n系統提示和會話記錄將使用AITuberKit的值。"
-  }
+  },
+  "CannotUseVoice": "實時API模式或音訊模式已啟用，因此不需要音設定。"
 }
diff --git a/src/components/settings/modelProvider.tsx b/src/components/settings/modelProvider.tsx
index cdddb09..67d82ad 100644
--- a/src/components/settings/modelProvider.tsx
+++ b/src/components/settings/modelProvider.tsx
@@ -269,7 +269,7 @@ const ModelProvider = () => {
                       <option value="input_audio">{t('InputAudio')}</option>
                     </select>
                     <div className="my-16 font-bold">
-                      {t('realtimeAPIModeVoice')}
+                      {t('RealtimeAPIModeVoice')}
                     </div>
                     <select
                       className="px-16 py-8 w-col-span-2 bg-surface1 hover:bg-surface1-hover rounded-8"
diff --git a/src/components/settings/voice.tsx b/src/components/settings/voice.tsx
index 1897c27..d381566 100644
--- a/src/components/settings/voice.tsx
+++ b/src/components/settings/voice.tsx
@@ -24,6 +24,9 @@ const Voice = () => {
   const koeiromapKey = settingsStore((s) => s.koeiromapKey)
   const elevenlabsApiKey = settingsStore((s) => s.elevenlabsApiKey)
 
+  const realtimeAPIMode = settingsStore((s) => s.realtimeAPIMode)
+  const audioMode = settingsStore((s) => s.audioMode)
+
   const selectVoice = settingsStore((s) => s.selectVoice)
   const koeiroParam = settingsStore((s) => s.koeiroParam)
   const googleTtsType = settingsStore((s) => s.googleTtsType)
@@ -86,6 +89,15 @@ const Voice = () => {
     }
   }, [selectVoice, nijivoiceApiKey])
 
+  // 追加: realtimeAPIMode または audioMode が true の場合にメッセージを表示
+  if (realtimeAPIMode || audioMode) {
+    return (
+      <div className="text-center typography-20 whitespace-pre-line">
+        {t('CannotUseVoice')}
+      </div>
+    )
+  }
+
   return (
     <div className="">
       <div className="mb-16 typography-20 font-bold">

From 684f663ccfdd638c7be7d80f8944843ed8ee5e9d Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 23:13:01 +0100
Subject: [PATCH 10/14] =?UTF-8?q?=E3=83=A6=E3=83=BC=E3=82=B6=E3=83=A1?=
 =?UTF-8?q?=E3=83=83=E3=82=BB=E3=83=BC=E3=82=B8=E3=81=AB=E3=82=BF=E3=82=A4?=
 =?UTF-8?q?=E3=83=A0=E3=82=B9=E3=82=BF=E3=83=B3=E3=83=97=E3=82=92=E5=8A=A0?=
 =?UTF-8?q?=E3=81=88=E3=82=8B=E3=82=AA=E3=83=97=E3=82=B7=E3=83=A7=E3=83=B3?=
 =?UTF-8?q?=E3=81=AE=E4=B8=8D=E5=85=B7=E5=90=88=E4=BF=AE=E6=AD=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .env                                      |  3 +++
 .env.example                              |  3 +++
 locales/en/translation.json               |  2 +-
 locales/ja/translation.json               |  2 +-
 locales/ko/translation.json               |  2 +-
 locales/zh/translation.json               |  2 +-
 src/features/messages/messageSelectors.ts | 24 ++++++++++++++++-------
 7 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/.env b/.env
index 41cdc83..c10fec4 100644
--- a/.env
+++ b/.env
@@ -144,3 +144,6 @@ NEXT_PUBLIC_SYSTEM_PROMPT=""
 
 # Selected VRM
 NEXT_PUBLIC_SELECTED_VRM_PATH=""
+
+# Whether to include timestamp in user message (true or false)
+NEXT_PUBLIC_INCLUDE_TIMESTAMP_IN_USER_MESSAGE=""
diff --git a/.env.example b/.env.example
index 6da7acb..3a3d600 100644
--- a/.env.example
+++ b/.env.example
@@ -135,3 +135,6 @@ NEXT_PUBLIC_SYSTEM_PROMPT=""
 
 # Selected VRM
 NEXT_PUBLIC_SELECTED_VRM_PATH=""
+
+# Whether to include timestamp in user message (true or false)
+NEXT_PUBLIC_INCLUDE_TIMESTAMP_IN_USER_MESSAGE=""
diff --git a/locales/en/translation.json b/locales/en/translation.json
index 54c65a2..b67a929 100644
--- a/locales/en/translation.json
+++ b/locales/en/translation.json
@@ -75,7 +75,7 @@
   "StyleBertVITS2Info": "Using Style-Bert-VITS2. It supports only Japanese, English, and Chinese. If using a local API, you need to download and launch the app that suits your environment from the site below. Please also set up an API key if necessary.",
   "SpeakerSelection": "Speaker Selection",
   "IncludeTimestampInUserMessage": "Include timestamp in user message",
-  "IncludeTimestampInUserMessageInfo": "Including a timestamp in user messages allows AI to generate responses considering time. Please include the following text in the system prompt.\n\n「The user input may be requested with a timestamp. This represents the time at the request, so please generate a response considering that time.」",
+  "IncludeTimestampInUserMessageInfo": "By including timestamps in user messages, the AI can generate responses while considering the time.\nPlease include the following text in your system prompt:\n\n\"User input may include [timestamp]. This represents the UTC time at the moment of the request, so please generate responses considering this timestamp.\"",
   "GoogleTTSInfo": "Using Google Cloud Text-to-Speech. It supports multiple languages.",
   "AuthFileInstruction": "Obtain the authentication JSON file below and place it in the root folder of the repository as 'credentials.json'.",
   "LanguageModelURL": "Select the language model from the URL below.",
diff --git a/locales/ja/translation.json b/locales/ja/translation.json
index 71e61ef..b28b16e 100644
--- a/locales/ja/translation.json
+++ b/locales/ja/translation.json
@@ -76,7 +76,7 @@
   "SpeakerSelection": "ボイスタイプ選択",
   "EnglishToJapanese": "英単語を日本語で読み上げる",
   "IncludeTimestampInUserMessage": "ユーザー発言にタイムスタンプを含める",
-  "IncludeTimestampInUserMessageInfo": "ユーザー発言にタイムスタンプを含めることで、AIが時間を考慮して応答を生成できるようになります。\n以下のような文章をシステムプロンプトに含めてください。\n\n「ユーザー入力が [timestamp] 付きでリクエストされる場合があります。これはリクエスト時点の時刻を表しているので、その時刻を考慮したうえで回答を生成してください。」",
+  "IncludeTimestampInUserMessageInfo": "ユーザー発言にタイムスタンプを含めることで、AIが時間を考慮して応答を生成できるようになります。\n以下のような文章をシステムプロンプトに含めてください。\n\n「ユーザー入力が [timestamp] 付きでリクエストされる場合があります。これはリクエスト時点のUTCタイムゾーンの時刻を表しているので、その時刻を考慮したうえで回答を生成してください。」",
   "GoogleTTSInfo": "Google Cloud Text-to-Speechを使用しています。多言語に対応可能です。",
   "AuthFileInstruction": "認証用のJSONファイルを下記から取得し、リポジトリのルートフォルダに credentials.json という名称で配置してください。",
   "LanguageModelURL": "言語モデルは下記のURLから選択してください。",
diff --git a/locales/ko/translation.json b/locales/ko/translation.json
index 0235900..4cef676 100644
--- a/locales/ko/translation.json
+++ b/locales/ko/translation.json
@@ -75,7 +75,7 @@
   "StyleBertVITS2Info": "Style-Bert-VITS2를 사용하고 있습니다. 일본어, 영어, 중국어만 지원됩니다. 로컬 API를 사용하는 경우 아래 사이트에서 환경에 맞는 앱을 다운로드하고 실행해야 합니다. 필요한 경우 API 키도 설정해 주세요.",
   "SpeakerSelection": "보이스 타입 선택",
   "IncludeTimestampInUserMessage": "사용자 발언에 타임스탬프 포함",
-  "IncludeTimestampInUserMessageInfo": "사용자 발언에 타임스탬프를 포함하면 AI가 시간을 고려하여 응답을 생성할 수 있습니다.\n다음과 같은 문장을 시스템 프롬프트에 포함해 주세요.\n\n「사용자 입력이 [timestamp]가 포함된 요청인 경우가 있습니다. 이는 요청 시점의 시각을 나타내므로, 해당 시각을 고려하여 답변을 생성해 주세요.」",
+  "IncludeTimestampInUserMessageInfo": "사용자 발언에 타임스탬프를 포함하면 AI가 시간을 고려하여 응답을 생성할 수 있습니다.\n다음과 같은 문장을 시스템 프롬프트에 포함해 주세요.\n\n「사용자 입력에 [timestamp]가 포함되어 요청되는 경우가 있습니다. 이는 요청 시점의 UTC 타임존 시각을 나타내므로, 해당 시각을 고려하여 응답을 생성해 주세요.」",
   "GoogleTTSInfo": "Google Cloud Text-to-Speech를 사용합니다. 다국어 지원이 가능합니다.",
   "AuthFileInstruction": "인증용 JSON 파일을 아래에서 얻어 리포지토리 루트 폴더에 credentials.json이라는 이름으로 배치하십시오.",
   "LanguageModelURL": "언어 모델은 아래 URL에서 선택하십시오.",
diff --git a/locales/zh/translation.json b/locales/zh/translation.json
index a2e5ad0..6e06dc6 100644
--- a/locales/zh/translation.json
+++ b/locales/zh/translation.json
@@ -75,7 +75,7 @@
   "StyleBertVITS2Info": "使用 Style-Bert-VITS2。僅支援日語、英語和中文。如果使用本地 API，您需要從以下網站下載並啟動適合您環境的應用程式。如有需要，請也設定 API 金鑰。",
   "SpeakerSelection": "選擇語音角色",
   "IncludeTimestampInUserMessage": "在用戶訊息中包含時間戳記",
-  "IncludeTimestampInUserMessageInfo": "在用戶訊息中包含時間戳記，可以讓 AI 根據時間生成回應。\n請在系統提示中包含以下文字：\n\n「用戶輸入可能會包含 [timestamp] 時間戳記。這代表請求時的時間，請根據該時間生成回應。」",
+  "IncludeTimestampInUserMessageInfo": "在用戶訊息中包含時間戳記，可以讓 AI 在生成回應時考慮時間因素。\n請在系統提示中包含以下文字：\n\n「用戶輸入可能會包含 [timestamp] 時間戳記。這表示請求時的 UTC 時區時間，請在生成回應時將此時間納入考慮。」",
   "GoogleTTSInfo": "使用 Google Cloud 文字轉語音。支援多種語言。",
   "AuthFileInstruction": "在下方獲取認證 JSON 檔案，並將其放置於儲存庫的根目錄下，命名為 'credentials.json'。",
   "LanguageModelURL": "從下方 URL 選擇語言模型。",
diff --git a/src/features/messages/messageSelectors.ts b/src/features/messages/messageSelectors.ts
index a7d41a8..2c0cb3b 100644
--- a/src/features/messages/messageSelectors.ts
+++ b/src/features/messages/messageSelectors.ts
@@ -34,16 +34,26 @@ export const messageSelectors = {
   ): Message[] => {
     return messages
       .map((message, index) => {
+        // 最後のメッセージだけそのまま利用する（= 最後のメッセージだけマルチモーダルの対象となる）
         const isLastMessage = index === messages.length - 1
-        const messageContent = Array.isArray(message.content)
+        const messageText = Array.isArray(message.content)
           ? message.content[0].text
-          : message.content
+          : message.content || ''
 
-        const content = includeTimestamp
-          ? `[${message.timestamp}] ${isLastMessage ? message.content : messageContent}`
-          : isLastMessage
-            ? message.content
-            : messageContent
+        let content: Message['content']
+        if (includeTimestamp) {
+          content = message.timestamp
+            ? `[${message.timestamp}] ${messageText}`
+            : messageText
+          if (isLastMessage && Array.isArray(message.content)) {
+            content = [
+              { type: 'text', text: content },
+              { type: 'image', image: message.content[1].image },
+            ]
+          }
+        } else {
+          content = isLastMessage ? message.content : messageText
+        }
 
         return {
           role: ['assistant', 'user', 'system'].includes(message.role)

From 8c70b66550846ae264b3978bb9f9b7289e36f1bb Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 23:26:02 +0100
Subject: [PATCH 11/14] =?UTF-8?q?=E3=82=AB=E3=83=A1=E3=83=A9=E8=B5=B7?=
 =?UTF-8?q?=E5=8B=95=E6=99=82=E3=81=AB=E6=B7=BB=E4=BB=98=E7=94=BB=E5=83=8F?=
 =?UTF-8?q?=E3=81=8C=E3=81=82=E3=82=8B=E3=81=A82=E5=BA=A6=E9=80=81?=
 =?UTF-8?q?=E4=BF=A1=E3=81=95=E3=82=8C=E3=81=A6=E3=81=97=E3=81=BE=E3=81=86?=
 =?UTF-8?q?=E4=B8=8D=E5=85=B7=E5=90=88=E3=82=92=E4=BF=AE=E6=AD=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/components/form.tsx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/components/form.tsx b/src/components/form.tsx
index fd349d3..213947b 100644
--- a/src/components/form.tsx
+++ b/src/components/form.tsx
@@ -28,7 +28,10 @@ export const Form = () => {
 
   const hookSendChat = useCallback(
     (text: string) => {
-      homeStore.setState({ triggerShutter: true })
+      // すでにmodalImageが存在する場合は、Webcamのキャプチャーをスキップ
+      if (!homeStore.getState().modalImage) {
+        homeStore.setState({ triggerShutter: true })
+      }
 
       // MENUの中でshowCameraがtrueの場合、画像が取得されるまで待機
       if (webcamStatus || captureStatus) {

From b0e87619b4695d9ee9be7a7fba2f129f4acb6de8 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Thu, 12 Dec 2024 23:34:17 +0100
Subject: [PATCH 12/14] =?UTF-8?q?Issue=E7=94=A8=E3=82=A2=E3=83=8A=E3=83=A9?=
 =?UTF-8?q?=E3=82=A4=E3=82=B6=E3=82=92=E4=B8=80=E6=97=A6=E9=9D=9E=E6=B4=BB?=
 =?UTF-8?q?=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/issue-analyzer.yml | 44 ----------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 .github/workflows/issue-analyzer.yml

diff --git a/.github/workflows/issue-analyzer.yml b/.github/workflows/issue-analyzer.yml
deleted file mode 100644
index a41371d..0000000
--- a/.github/workflows/issue-analyzer.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-# name: Issue Analyzer
-
-# on:
-#   issues:
-#     types: [opened]
-#   workflow_dispatch:
-#     inputs:
-#       issue_number:
-#         description: 'Issue Number'
-#         required: true
-#       issue_title:
-#         description: 'Issue Title'
-#         required: true
-#       issue_body:
-#         description: 'Issue Body'
-#         required: true
-
-# jobs:
-#   analyze-issue:
-#     runs-on: ubuntu-latest
-#     permissions:
-#       issues: write
-#     steps:
-#       - name: Checkout repository
-#         uses: actions/checkout@v2
-
-#       - name: Set up Python
-#         uses: actions/setup-python@v2
-#         with:
-#           python-version: '3.x'
-
-#       - name: Install dependencies
-#         run: |
-#           python -m pip install --upgrade pip
-#           pip install requests anthropic
-
-#       - name: Analyze issue
-#         env:
-#           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-#           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-#           ISSUE_NUMBER: ${{ github.event.inputs.issue_number || github.event.issue.number }}
-#           ISSUE_TITLE: ${{ github.event.inputs.issue_title || github.event.issue.title }}
-#           ISSUE_BODY: ${{ github.event.inputs.issue_body || github.event.issue.body }}
-#         run: python scripts/analyze_issue.py

From 9afc6413b8c8569caa6a6fe6a6a77e16ec512d7c Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Fri, 13 Dec 2024 17:36:49 +0100
Subject: [PATCH 13/14] =?UTF-8?q?=E9=9F=B3=E5=A3=B0=E3=82=92=E7=9B=B4?=
 =?UTF-8?q?=E5=88=97=E5=87=A6=E7=90=86=E3=81=AB=E6=88=BB=E3=81=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/features/messages/speakCharacter.ts | 203 +++++++++++++-----------
 src/features/messages/speakQueue.ts     |  22 +--
 2 files changed, 115 insertions(+), 110 deletions(-)

diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts
index 78c3d8a..9a7fc42 100644
--- a/src/features/messages/speakCharacter.ts
+++ b/src/features/messages/speakCharacter.ts
@@ -1,6 +1,7 @@
 import homeStore from '@/features/stores/home'
 import settingsStore from '@/features/stores/settings'
 import englishToJapanese from '@/utils/englishToJapanese.json'
+import { wait } from '@/utils/wait'
 import { Talk } from './messages'
 import { synthesizeStyleBertVITS2Api } from './synthesizeStyleBertVITS2'
 import { synthesizeVoiceKoeiromapApi } from './synthesizeVoiceKoeiromap'
@@ -47,6 +48,9 @@ function preprocessMessage(
 }
 
 const createSpeakCharacter = () => {
+  let lastTime = 0
+  let prevFetchPromise: Promise<unknown> = Promise.resolve()
+
   return (talk: Talk, onStart?: () => void, onComplete?: () => void) => {
     const ss = settingsStore.getState()
     onStart?.()
@@ -62,110 +66,117 @@ const createSpeakCharacter = () => {
 
     let isNeedDecode = true
 
-    // audioBufferのフェッチを開始
-    const audioBufferPromise: Promise<ArrayBuffer | null> = (async () => {
+    const fetchPromise = prevFetchPromise.then(async () => {
+      const now = Date.now()
+      if (now - lastTime < 1000) {
+        await wait(1000 - (now - lastTime))
+      }
+
+      let buffer
       try {
-        if (talk.message === '' && talk.buffer) {
+        if (talk.message == '' && talk.buffer) {
+          buffer = talk.buffer
           isNeedDecode = false
-          return talk.buffer
         } else if (ss.audioMode) {
-          return null
-        }
-
-        // 選択されたボイスに応じたTTS APIを呼び出す
-        switch (ss.selectVoice) {
-          case 'koeiromap':
-            return await synthesizeVoiceKoeiromapApi(
-              talk,
-              ss.koeiromapKey,
-              ss.koeiroParam
-            )
-          case 'voicevox':
-            return await synthesizeVoiceVoicevoxApi(
-              talk,
-              ss.voicevoxSpeaker,
-              ss.voicevoxSpeed,
-              ss.voicevoxPitch,
-              ss.voicevoxIntonation
-            )
-          case 'google':
-            return await synthesizeVoiceGoogleApi(
-              talk,
-              ss.googleTtsType,
-              ss.selectLanguage
-            )
-          case 'stylebertvits2':
-            return await synthesizeStyleBertVITS2Api(
-              talk,
-              ss.stylebertvits2ServerUrl,
-              ss.stylebertvits2ApiKey,
-              ss.stylebertvits2ModelId,
-              ss.stylebertvits2Style,
-              ss.stylebertvits2SdpRatio,
-              ss.stylebertvits2Length,
-              ss.selectLanguage
-            )
-          case 'aivis_speech':
-            return await synthesizeVoiceAivisSpeechApi(
-              talk,
-              ss.aivisSpeechSpeaker,
-              ss.aivisSpeechSpeed,
-              ss.aivisSpeechPitch,
-              ss.aivisSpeechIntonation
-            )
-          case 'gsvitts':
-            return await synthesizeVoiceGSVIApi(
-              talk,
-              ss.gsviTtsServerUrl,
-              ss.gsviTtsModelId,
-              ss.gsviTtsBatchSize,
-              ss.gsviTtsSpeechRate
-            )
-          case 'elevenlabs':
-            return await synthesizeVoiceElevenlabsApi(
-              talk,
-              ss.elevenlabsApiKey,
-              ss.elevenlabsVoiceId,
-              ss.selectLanguage
-            )
-          case 'openai':
-            return await synthesizeVoiceOpenAIApi(
-              talk,
-              ss.openaiTTSKey || ss.openaiKey,
-              ss.openaiTTSVoice,
-              ss.openaiTTSModel,
-              ss.openaiTTSSpeed
-            )
-          case 'azure':
-            return await synthesizeVoiceAzureOpenAIApi(
-              talk,
-              ss.azureTTSKey || ss.azureKey,
-              ss.azureTTSEndpoint || ss.azureEndpoint,
-              ss.openaiTTSVoice,
-              ss.openaiTTSSpeed
-            )
-          case 'nijivoice':
-            return await synthesizeVoiceNijivoiceApi(
-              talk,
-              ss.nijivoiceApiKey,
-              ss.nijivoiceActorId,
-              ss.nijivoiceSpeed
-            )
-          default:
-            throw new Error('Unsupported voice type')
+          buffer = null
+        } else if (ss.selectVoice == 'koeiromap') {
+          buffer = await synthesizeVoiceKoeiromapApi(
+            talk,
+            ss.koeiromapKey,
+            ss.koeiroParam
+          )
+        } else if (ss.selectVoice == 'voicevox') {
+          buffer = await synthesizeVoiceVoicevoxApi(
+            talk,
+            ss.voicevoxSpeaker,
+            ss.voicevoxSpeed,
+            ss.voicevoxPitch,
+            ss.voicevoxIntonation
+          )
+        } else if (ss.selectVoice == 'google') {
+          buffer = await synthesizeVoiceGoogleApi(
+            talk,
+            ss.googleTtsType,
+            ss.selectLanguage
+          )
+        } else if (ss.selectVoice == 'stylebertvits2') {
+          buffer = await synthesizeStyleBertVITS2Api(
+            talk,
+            ss.stylebertvits2ServerUrl,
+            ss.stylebertvits2ApiKey,
+            ss.stylebertvits2ModelId,
+            ss.stylebertvits2Style,
+            ss.stylebertvits2SdpRatio,
+            ss.stylebertvits2Length,
+            ss.selectLanguage
+          )
+        } else if (ss.selectVoice == 'aivis_speech') {
+          buffer = await synthesizeVoiceAivisSpeechApi(
+            talk,
+            ss.aivisSpeechSpeaker,
+            ss.aivisSpeechSpeed,
+            ss.aivisSpeechPitch,
+            ss.aivisSpeechIntonation
+          )
+        } else if (ss.selectVoice == 'gsvitts') {
+          buffer = await synthesizeVoiceGSVIApi(
+            talk,
+            ss.gsviTtsServerUrl,
+            ss.gsviTtsModelId,
+            ss.gsviTtsBatchSize,
+            ss.gsviTtsSpeechRate
+          )
+        } else if (ss.selectVoice == 'elevenlabs') {
+          buffer = await synthesizeVoiceElevenlabsApi(
+            talk,
+            ss.elevenlabsApiKey,
+            ss.elevenlabsVoiceId,
+            ss.selectLanguage
+          )
+        } else if (ss.selectVoice == 'openai') {
+          buffer = await synthesizeVoiceOpenAIApi(
+            talk,
+            ss.openaiTTSKey || ss.openaiKey,
+            ss.openaiTTSVoice,
+            ss.openaiTTSModel,
+            ss.openaiTTSSpeed
+          )
+        } else if (ss.selectVoice == 'azure') {
+          buffer = await synthesizeVoiceAzureOpenAIApi(
+            talk,
+            ss.azureTTSKey || ss.azureKey,
+            ss.azureTTSEndpoint || ss.azureEndpoint,
+            ss.openaiTTSVoice,
+            ss.openaiTTSSpeed
+          )
+        } else if (ss.selectVoice == 'nijivoice') {
+          buffer = await synthesizeVoiceNijivoiceApi(
+            talk,
+            ss.nijivoiceApiKey,
+            ss.nijivoiceActorId,
+            ss.nijivoiceSpeed
+          )
         }
       } catch (error) {
         handleTTSError(error, ss.selectVoice)
         return null
       }
-    })()
-
-    // タスクをSpeakQueueに追加
-    speakQueue.addTask({
-      audioBufferPromise,
-      talk,
-      isNeedDecode,
-      onComplete,
+      lastTime = Date.now()
+      return buffer
+    })
+
+    prevFetchPromise = fetchPromise
+
+    // キューを使用した処理に変更
+    fetchPromise.then((audioBuffer) => {
+      if (!audioBuffer) return
+
+      speakQueue.addTask({
+        audioBuffer,
+        talk,
+        isNeedDecode,
+        onComplete,
+      })
     })
   }
 }
diff --git a/src/features/messages/speakQueue.ts b/src/features/messages/speakQueue.ts
index a7aa26b..fbc18b9 100644
--- a/src/features/messages/speakQueue.ts
+++ b/src/features/messages/speakQueue.ts
@@ -1,8 +1,8 @@
 import { Talk } from './messages'
 import homeStore from '@/features/stores/home'
 
-type SpeakTaskWithPromise = {
-  audioBufferPromise: Promise<ArrayBuffer | null>
+type SpeakTask = {
+  audioBuffer: ArrayBuffer
   talk: Talk
   isNeedDecode: boolean
   onComplete?: () => void
@@ -10,12 +10,12 @@ type SpeakTaskWithPromise = {
 
 export class SpeakQueue {
   private static readonly QUEUE_CHECK_DELAY = 1500
-  private queue: SpeakTaskWithPromise[] = []
+  private queue: SpeakTask[] = []
   private isProcessing = false
 
-  addTask(task: SpeakTaskWithPromise) {
+  async addTask(task: SpeakTask) {
     this.queue.push(task)
-    this.processQueue()
+    await this.processQueue()
   }
 
   private async processQueue() {
@@ -27,15 +27,9 @@ export class SpeakQueue {
       const task = this.queue.shift()
       if (task) {
         try {
-          const audioBuffer = await task.audioBufferPromise
-          if (audioBuffer) {
-            await hs.viewer.model?.speak(
-              audioBuffer,
-              task.talk,
-              task.isNeedDecode
-            )
-            task.onComplete?.()
-          }
+          const { audioBuffer, talk, isNeedDecode, onComplete } = task
+          await hs.viewer.model?.speak(audioBuffer, talk, isNeedDecode)
+          onComplete?.()
         } catch (error) {
           console.error(
             'An error occurred while processing the speech synthesis task:',

From 6f87f5fd53b1486e813ad75aea65fd73dcc25d27 Mon Sep 17 00:00:00 2001
From: tegnike <o3nike.teg.14@gmail.com>
Date: Fri, 13 Dec 2024 17:46:04 +0100
Subject: [PATCH 14/14] =?UTF-8?q?=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7?=
 =?UTF-8?q?=E3=83=B3=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/components/settings/index.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/components/settings/index.tsx b/src/components/settings/index.tsx
index f67c4e2..1a9f9db 100644
--- a/src/components/settings/index.tsx
+++ b/src/components/settings/index.tsx
@@ -126,7 +126,7 @@ const Main = () => {
 const Footer = () => {
   return (
     <footer className="absolute py-4 bg-[#413D43] text-center text-white font-Montserrat bottom-0 w-full">
-      powered by ChatVRM from Pixiv. version 2.16.0
+      powered by ChatVRM from Pixiv. version 2.17.0
     </footer>
   )
 }