From 0db2e57f21bb983a3e64e740c08611f663a87919 Mon Sep 17 00:00:00 2001 From: Kodai Aoyama Date: Fri, 8 Nov 2024 23:24:52 +0900 Subject: [PATCH 1/3] refactor: rename is_continuous flag for better clarity of termination mode --- src-tauri/src/module/chat_online.rs | 4 ++-- src-tauri/src/module/record.rs | 16 ++++++++++++++-- src-tauri/src/module/record_desktop.rs | 12 +++++++++++- src-tauri/src/module/transcription.rs | 4 ++-- src-tauri/src/module/transcription_amivoice.rs | 4 ++-- src-tauri/src/module/transcription_ja.rs | 4 ++-- src-tauri/src/module/transcription_online.rs | 4 ++-- src-tauri/src/module/translation_en.rs | 4 ++-- src-tauri/src/module/translation_ja.rs | 4 ++-- src-tauri/src/module/translation_ja_high.rs | 4 ++-- 10 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src-tauri/src/module/chat_online.rs b/src-tauri/src/module/chat_online.rs index 9a58dc6..e10c7d4 100644 --- a/src-tauri/src/module/chat_online.rs +++ b/src-tauri/src/module/chat_online.rs @@ -36,9 +36,9 @@ impl ChatOnline { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/record.rs b/src-tauri/src/module/record.rs index 7209d52..4872af6 100644 --- a/src-tauri/src/module/record.rs +++ b/src-tauri/src/module/record.rs @@ -24,8 +24,8 @@ use tauri::{api::path::data_dir, AppHandle, Manager}; use super::{ chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, - transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high, - writer::Writer, + transcription_hybrid, transcription_ja, transcription_online, translation_en, translation_ja, + translation_ja_high, writer::Writer, }; pub struct Record { @@ -258,6 +258,17 @@ impl Record { if let Some(singleton) = lock.as_mut() { singleton.start(stop_convert_rx_clone, false); } + } else if transcription_accuracy_clone.starts_with("hybrid-transcript") + { + transcription_hybrid::initialize_transcription_hybrid( + app_handle_clone, + note_id, + ); + let mut lock = + transcription_hybrid::SINGLETON_INSTANCE.lock().unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx_clone, false); + } } else { transcription::initialize_transcription( app_handle_clone, @@ -300,6 +311,7 @@ impl Record { translation_ja_high::drop_translation_ja_high(); transcription_online::drop_transcription_online(); transcription_amivoice::drop_transcription_amivoice(); + transcription_hybrid::drop_transcription_hybrid(); chat_online::drop_chat_online(); } else { drop(stop_convert_tx) diff --git a/src-tauri/src/module/record_desktop.rs b/src-tauri/src/module/record_desktop.rs index 8f3eb63..dba6722 100644 --- a/src-tauri/src/module/record_desktop.rs +++ b/src-tauri/src/module/record_desktop.rs @@ -38,7 +38,7 @@ use screencapturekit::{ use vosk::Recognizer; use super::{ - chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer + chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, transcription_hybrid, transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer }; pub struct RecordDesktop { @@ -288,6 +288,15 @@ impl RecordDesktop { if let Some(singleton) = lock.as_mut() { singleton.start(stop_convert_rx_clone, false); } + } else if transcription_accuracy_clone.starts_with("hybrid-transcript") { + transcription_hybrid::initialize_transcription_hybrid( + app_handle_clone, + note_id, + ); + let mut lock = transcription_hybrid::SINGLETON_INSTANCE.lock().unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx_clone, false); + } } else { transcription::initialize_transcription( app_handle_clone, @@ -334,6 +343,7 @@ impl RecordDesktop { translation_ja_high::drop_translation_ja_high(); transcription_online::drop_transcription_online(); transcription_amivoice::drop_transcription_amivoice(); + transcription_hybrid::drop_transcription_hybrid(); chat_online::drop_chat_online(); } else { drop(stop_convert_tx) diff --git a/src-tauri/src/module/transcription.rs b/src-tauri/src/module/transcription.rs index 5ea43ea..a49a32f 100644 --- a/src-tauri/src/module/transcription.rs +++ b/src-tauri/src/module/transcription.rs @@ -37,9 +37,9 @@ impl Transcription { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/transcription_amivoice.rs b/src-tauri/src/module/transcription_amivoice.rs index 0b04669..a4be598 100644 --- a/src-tauri/src/module/transcription_amivoice.rs +++ b/src-tauri/src/module/transcription_amivoice.rs @@ -39,9 +39,9 @@ impl TranscriptionAmivoice { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/transcription_ja.rs b/src-tauri/src/module/transcription_ja.rs index dbb8b0d..e415bae 100644 --- a/src-tauri/src/module/transcription_ja.rs +++ b/src-tauri/src/module/transcription_ja.rs @@ -41,9 +41,9 @@ impl TranscriptionJa { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/transcription_online.rs b/src-tauri/src/module/transcription_online.rs index d46a2b3..00c044c 100644 --- a/src-tauri/src/module/transcription_online.rs +++ b/src-tauri/src/module/transcription_online.rs @@ -43,9 +43,9 @@ impl TranscriptionOnline { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/translation_en.rs b/src-tauri/src/module/translation_en.rs index 92325c6..628a9e4 100644 --- a/src-tauri/src/module/translation_en.rs +++ b/src-tauri/src/module/translation_en.rs @@ -38,9 +38,9 @@ impl TranslationEn { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/translation_ja.rs b/src-tauri/src/module/translation_ja.rs index 0c0656b..7bf9adb 100644 --- a/src-tauri/src/module/translation_ja.rs +++ b/src-tauri/src/module/translation_ja.rs @@ -40,9 +40,9 @@ impl TranslationJa { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle diff --git a/src-tauri/src/module/translation_ja_high.rs b/src-tauri/src/module/translation_ja_high.rs index cc7c8ee..9d4fcd4 100644 --- a/src-tauri/src/module/translation_ja_high.rs +++ b/src-tauri/src/module/translation_ja_high.rs @@ -85,9 +85,9 @@ impl TranslationJaHigh { } } - pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { while Self::convert(self).is_ok() { - if is_continuous { + if use_no_vosk_queue_terminate_mode { let vosk_speech = self.sqlite.select_vosk(self.note_id); if vosk_speech.is_err() { self.app_handle From bfd2fe180ccd0743aa48117f62631bb534e9c19e Mon Sep 17 00:00:00 2001 From: Kodai Aoyama Date: Sat, 9 Nov 2024 22:29:21 +0900 Subject: [PATCH 2/3] Added Hybrid Transcription support - Added the transcription_hybrid module and its related files to support hybrid transcription using Lycoris. Update migrations for hybrid transcription - Updated the migrations/001.sql file to include new columns for hybrid transcription using ReazonSpeech and Whisper. Update NoteMain component - Updated the NoteMain component to include new useEffect hooks for handling hybrid transcription events. Update TranscriptionAccuracy component - Updated the TranscriptionAccuracy component to include an option for selecting Lycoris as the transcription accuracy. --- src-tauri/migrations/001.sql | 4 + src-tauri/src/main.rs | 7 +- src-tauri/src/module/mod.rs | 6 +- src-tauri/src/module/sqlite.rs | 121 ++++++++ src-tauri/src/module/transcription_hybrid.rs | 75 +++++ .../src/module/transcription_hybrid_online.rs | 270 ++++++++++++++++++ .../transcription_hybrid_reazonspeech.rs | 183 ++++++++++++ .../module/transcription_hybrid_whisper.rs | 210 ++++++++++++++ .../molecules/TranscriptionAccuracy.tsx | 25 ++ src/components/organisms/NoteMain.tsx | 24 ++ 10 files changed, 923 insertions(+), 2 deletions(-) create mode 100644 src-tauri/src/module/transcription_hybrid.rs create mode 100644 src-tauri/src/module/transcription_hybrid_online.rs create mode 100644 src-tauri/src/module/transcription_hybrid_reazonspeech.rs create mode 100644 src-tauri/src/module/transcription_hybrid_whisper.rs diff --git a/src-tauri/migrations/001.sql b/src-tauri/migrations/001.sql index b685438..762bf1e 100644 --- a/src-tauri/migrations/001.sql +++ b/src-tauri/migrations/001.sql @@ -13,6 +13,10 @@ CREATE TABLE speeches ( created_at_unixtime INTEGER DEFAULT (CAST(strftime('%s', 'now') AS INTEGER)), content TEXT, content_2 TEXT, + is_done_with_hybrid_reazonspeech INTEGER DEFAULT 0, + is_done_with_hybrid_whisper INTEGER DEFAULT 0, + hybrid_reazonspeech_content TEXT, + hybrid_whisper_content TEXT, wav TEXT, model TEXT, -- manual|vosk|whisper diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index bdee3ce..28c4c8e 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -4,7 +4,8 @@ )] use tauri::{ - http::{HttpRange, ResponseBuilder}, AppHandle, Manager, PathResolver, State, Window + http::{HttpRange, ResponseBuilder}, + AppHandle, Manager, PathResolver, State, Window, }; use tauri_plugin_sql::{Migration, MigrationKind}; @@ -41,6 +42,7 @@ use module::{ synthesizer::{self, Synthesizer}, transcription::{TraceCompletion, Transcription}, transcription_amivoice::TranscriptionAmivoice, + transcription_hybrid::TranscriptionHybrid, transcription_ja::TranscriptionJa, transcription_online::TranscriptionOnline, translation_en::TranslationEn, @@ -325,6 +327,9 @@ fn start_trace_command( } else if transcription_accuracy.starts_with("reazonspeech") { let mut transcription_ja = TranscriptionJa::new(app_handle, note_id); transcription_ja.start(stop_convert_rx, true); + } else if transcription_accuracy.starts_with("hybrid-transcript") { + let mut transcription_hybrid = TranscriptionHybrid::new(app_handle, note_id); + transcription_hybrid.start(stop_convert_rx, true); } else { let mut transcription = Transcription::new( app_handle, diff --git a/src-tauri/src/module/mod.rs b/src-tauri/src/module/mod.rs index d77e0f8..41c0983 100644 --- a/src-tauri/src/module/mod.rs +++ b/src-tauri/src/module/mod.rs @@ -15,8 +15,12 @@ mod sqlite; pub mod synthesizer; mod transcriber; pub mod transcription; -pub mod transcription_ja; pub mod transcription_amivoice; +pub mod transcription_hybrid; +mod transcription_hybrid_online; +mod transcription_hybrid_reazonspeech; +mod transcription_hybrid_whisper; +pub mod transcription_ja; pub mod transcription_online; pub mod translation_en; pub mod translation_ja; diff --git a/src-tauri/src/module/sqlite.rs b/src-tauri/src/module/sqlite.rs index abc1194..c0d81d3 100644 --- a/src-tauri/src/module/sqlite.rs +++ b/src-tauri/src/module/sqlite.rs @@ -20,6 +20,13 @@ pub struct Speech { pub is_desktop: bool, } +#[derive(Debug, Clone, serde::Serialize)] +pub struct PreTranscript { + pub id: u16, + pub hybrid_whisper_content: String, + pub hybrid_reazonspeech_content: String, +} + #[derive(Debug, Clone, serde::Serialize)] pub struct Updated { pub id: u16, @@ -96,6 +103,92 @@ impl Sqlite { }); } + pub fn select_lateset_speeches( + &self, + note_id: u64, + max_hisotry_count: u64, + ) -> Result, rusqlite::Error> { + let mut stmt = self.conn + .prepare("SELECT id,speech_type,created_at_unixtime,content,wav,model,model_description,note_id FROM speeches WHERE model = \"whisper\" AND is_done_with_hybrid_whisper = 1 AND is_done_with_hybrid_reazonspeech = 1 AND note_id = ?1 ORDER BY created_at_unixtime DESC LIMIT ?2").unwrap(); + let results = stmt + .query_map(params![note_id, max_hisotry_count], |row| { + Ok(Speech { + id: row.get_unwrap(0), + speech_type: row.get_unwrap(1), + created_at_unixtime: row.get_unwrap(2), + content: row.get_unwrap(3), + wav: row.get_unwrap(4), + model: row.get_unwrap(5), + model_description: row.get_unwrap(6), + note_id: row.get_unwrap(7), + is_desktop: false, + }) + }) + .unwrap() + .collect::, rusqlite::Error>>(); + results + } + + pub fn select_no_proccessed_with_hybrid_reazonspeech( + &self, + note_id: u64, + ) -> Result { + return self.conn + .query_row("SELECT id,speech_type,created_at_unixtime,content,wav,model,model_description,note_id FROM speeches WHERE model = \"vosk\" AND is_done_with_hybrid_reazonspeech = 0 AND note_id = ?1 ORDER BY created_at_unixtime ASC LIMIT 1", + params![note_id], + |row| { + Ok(Speech { + id: row.get_unwrap(0), + speech_type: row.get_unwrap(1), + created_at_unixtime: row.get_unwrap(2), + content: row.get_unwrap(3), + wav: row.get_unwrap(4), + model: row.get_unwrap(5), + model_description: row.get_unwrap(6), + note_id: row.get_unwrap(7), + is_desktop: false, + }) + }); + } + + pub fn select_no_proccessed_with_hybrid_whisper( + &self, + note_id: u64, + ) -> Result { + return self.conn + .query_row("SELECT id,speech_type,created_at_unixtime,content,wav,model,model_description,note_id FROM speeches WHERE model = \"vosk\" AND is_done_with_hybrid_whisper = 0 AND note_id = ?1 ORDER BY created_at_unixtime ASC LIMIT 1", + params![note_id], + |row| { + Ok(Speech { + id: row.get_unwrap(0), + speech_type: row.get_unwrap(1), + created_at_unixtime: row.get_unwrap(2), + content: row.get_unwrap(3), + wav: row.get_unwrap(4), + model: row.get_unwrap(5), + model_description: row.get_unwrap(6), + note_id: row.get_unwrap(7), + is_desktop: false, + }) + }); + } + + pub fn select_pre_transcript_with_hybrid( + &self, + note_id: u64, + ) -> Result { + return self.conn + .query_row("SELECT id, hybrid_whisper_content, hybrid_reazonspeech_content FROM speeches WHERE model = \"vosk\" AND is_done_with_hybrid_whisper = 1 AND is_done_with_hybrid_reazonspeech = 1 AND note_id = ?1 ORDER BY created_at_unixtime ASC LIMIT 1", + params![note_id], + |row| { + Ok(PreTranscript { + id: row.get_unwrap(0), + hybrid_whisper_content: row.get_unwrap(1), + hybrid_reazonspeech_content: row.get_unwrap(2), + }) + }); + } + pub fn select_whisper_token(&self) -> Result { return self.conn.query_row( "SELECT setting_status FROM settings WHERE setting_name = \"settingKeyOpenai\"", @@ -287,6 +380,34 @@ impl Sqlite { } } + pub fn update_hybrid_reazonspeech_content( + &self, + id: u16, + content: String, + ) -> Result { + match self.conn.execute( + "UPDATE speeches SET is_done_with_hybrid_reazonspeech = 1, hybrid_reazonspeech_content = ?1 WHERE id = ?2", + params![content, id], + ) { + Ok(_) => Ok(Updated { id, content }), + Err(err) => Err(err), + } + } + + pub fn update_hybrid_whisper_content( + &self, + id: u16, + content: String, + ) -> Result { + match self.conn.execute( + "UPDATE speeches SET is_done_with_hybrid_whisper = 1, hybrid_whisper_content = ?1 WHERE id = ?2", + params![content, id], + ) { + Ok(_) => Ok(Updated { id, content }), + Err(err) => Err(err), + } + } + pub fn update_model_is_downloaded( &self, model_name: String, diff --git a/src-tauri/src/module/transcription_hybrid.rs b/src-tauri/src/module/transcription_hybrid.rs new file mode 100644 index 0000000..bf25cee --- /dev/null +++ b/src-tauri/src/module/transcription_hybrid.rs @@ -0,0 +1,75 @@ +use super::{transcription_hybrid_online, transcription_hybrid_reazonspeech, transcription_hybrid_whisper}; + +use crossbeam_channel::Receiver; +use std::{sync::Mutex, thread}; +use tauri::AppHandle; + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TraceCompletion {} + +pub struct TranscriptionHybrid { + app_handle: AppHandle, + note_id: u64, +} + +impl TranscriptionHybrid { + pub fn new(app_handle: AppHandle, note_id: u64) -> Self { + TranscriptionHybrid { + app_handle, + note_id, + } + } + + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { + let note_id = self.note_id; + + let app_handle_clone_for_reazonspeech = self.app_handle.clone(); + let app_handle_clone_for_whisper = self.app_handle.clone(); + + let stop_convert_rx_clone_for_reazonspeech = stop_convert_rx.clone(); + let stop_convert_rx_clone_for_whisper = stop_convert_rx.clone(); + + thread::spawn(move || { + transcription_hybrid_reazonspeech::initialize_transcription_hybrid_reazonspeech( + app_handle_clone_for_reazonspeech, + note_id, + ); + let mut lock = transcription_hybrid_reazonspeech::SINGLETON_INSTANCE + .lock() + .unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx_clone_for_reazonspeech, use_no_vosk_queue_terminate_mode); + } + }); + + thread::spawn(move || { + transcription_hybrid_whisper::initialize_transcription_hybrid_whisper( + app_handle_clone_for_whisper, + note_id, + ); + let mut lock = transcription_hybrid_whisper::SINGLETON_INSTANCE + .lock() + .unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx_clone_for_whisper, use_no_vosk_queue_terminate_mode); + } + }); + } +} + +pub static SINGLETON_INSTANCE: Mutex> = Mutex::new(None); + +pub fn initialize_transcription_hybrid(app_handle: AppHandle, note_id: u64) { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + if singleton.is_none() { + *singleton = Some(TranscriptionHybrid::new(app_handle, note_id)); + } +} + +pub fn drop_transcription_hybrid() { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + *singleton = None; + transcription_hybrid_reazonspeech::drop_transcription_hybrid_reazonspeech(); + transcription_hybrid_whisper::drop_transcription_hybrid_whisper(); + transcription_hybrid_online::drop_transcription_hybrid_online(); +} diff --git a/src-tauri/src/module/transcription_hybrid_online.rs b/src-tauri/src/module/transcription_hybrid_online.rs new file mode 100644 index 0000000..c9ed476 --- /dev/null +++ b/src-tauri/src/module/transcription_hybrid_online.rs @@ -0,0 +1,270 @@ +use tokio::runtime::Runtime; + +use super::sqlite::{Speech, Sqlite}; + +use crossbeam_channel::Receiver; + +use reqwest::{ + header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_TYPE}, + Client, +}; +use serde_json::{json, to_string, Value}; +use std::sync::Mutex; +use tauri::{AppHandle, Manager}; + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TraceCompletion {} + +pub struct TranscriptionHybridOnline { + runtime: Runtime, + app_handle: AppHandle, + sqlite: Sqlite, + note_id: u64, + token: String, +} + +impl TranscriptionHybridOnline { + pub fn new(app_handle: AppHandle, note_id: u64) -> Self { + let runtime = Runtime::new().expect("Failed to create Tokio runtime"); + let sqlite = Sqlite::new(); + let token = sqlite.select_whisper_token().unwrap(); + Self { + runtime, + app_handle, + sqlite, + note_id, + token, + } + } + + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { + while Self::convert(self).is_ok() { + if use_no_vosk_queue_terminate_mode { + let vosk_speech = self.sqlite.select_pre_transcript_with_hybrid(self.note_id); + if vosk_speech.is_err() { + self.app_handle + .clone() + .emit_all("traceCompletion", TraceCompletion {}) + .unwrap(); + break; + } + } + if stop_convert_rx.try_recv().is_ok() { + let vosk_speech = self.sqlite.select_pre_transcript_with_hybrid(self.note_id); + if vosk_speech.is_err() { + self.app_handle + .clone() + .emit_all("traceCompletion", TraceCompletion {}) + .unwrap(); + } else { + self.app_handle + .clone() + .emit_all("traceUnCompletion", TraceCompletion {}) + .unwrap(); + } + break; + } + } + } + + fn create_json( + hybrid_whisper_content: String, + hybrid_reazonspeech_content: String, + history: Vec, + ) -> Value { + let mut json_data = json!({ + "transcriptions": { + "reazonspeech": hybrid_reazonspeech_content, + "whisper": hybrid_whisper_content, + } + }); + + if !history.is_empty() { + json_data["contextual_references"] = json!({ + "history": + history.into_iter().map(|entry| { + json!({ + "timestamp": entry.created_at_unixtime, + "content": entry.content, + }) + }).collect::>() + }); + } + + json_data + } + + async fn request( + hybrid_whisper_content: String, + hybrid_reazonspeech_content: String, + token: String, + latest_speeches: Vec, + ) -> Result> { + let url = "https://api.openai.com/v1/chat/completions"; + let temperature = 0.2; + + let client = Client::new(); + + let mut headers = HeaderMap::new(); + headers.insert( + AUTHORIZATION, + HeaderValue::from_str(&format!("Bearer {}", token))?, + ); + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + + let mut messages: Vec = Vec::new(); + let system_prompt = "あなたの役割は、ReazonSpeechとWhisperの出力を統合して「正確で読みやすい文字起こし」を作成することです。 + +### 目的 +- **正確性の確保**:発話内容を正確に伝えます。 +- **読みやすさの向上**:読み手に理解しやすい文章に仕上げます。 + +### 注意点 +- **誤認識の修正を最優先**:文脈や一般的な知識に基づいて、誤った表現を正しく修正してください。特に専門用語や固有名詞に注意してください。 +- **ハルシネーションの除去**:実際の発話に含まれていないフレーズや、関係のない内容(例:「ご視聴ありがとうございました」など)を削除してください。 +- **文脈と推測の活用**:文脈から適切な言葉を推測し、誤認識を修正してください。 +- **用語の一貫性**:同じ用語や表現は一貫して使用してください。 +- **自然な日本語表現**:文法的に正しく、自然な日本語になるように修正してください。 + +### 統合手順 + +1. **テキストの確定** + - ReazonSpeechの出力をベースに発話内容を確定します。 + - 必要に応じてWhisperの出力や文脈を参考に、内容を補完します。 + +2. **誤認識の修正** + - 文脈や一般知識を活用し、誤った表現を正しく修正します。特に専門用語や固有名詞に注意してください。 + +3. **句読点の挿入** + - 読みやすさを向上させるために、適切な位置に句読点を挿入します。 + +4. **表現の調整** + - 冗長な表現を避け、自然で簡潔な文章に整えます。 + +5. **最終チェック** + - 全体を見直し、一貫性と正確さ、自然な流れを確認します。 + +出力は以下の形式で返してください: + +```json +{ + \"integrated_transcription\": \"統合された文字起こし結果をここに挿入\" +} +```".to_string(); + + messages.push(json!({ + "role": "system", + "content": system_prompt + })); + + messages.push(json!({ + "role": "user", + "content": to_string(&Self::create_json(hybrid_whisper_content, hybrid_reazonspeech_content, latest_speeches)).unwrap() + })); + + // for debugging + println!("messages: {:?}", messages); + + let response_format = json!({ + "type": "json_schema", + "json_schema": { + "name": "generate_integrated_transcription", + "description": "ReazonSpeechとWhisperの出力を統合し、自然で正確な統合文字起こし結果を生成します。", + "strict": true, + "schema": { + "type": "object", + "properties": { + "integrated_transcription": { + "type": "string", + "description": "統合された文字起こしの結果。読みやすさと正確さを重視して生成されたものです。" + } + }, + "required": ["integrated_transcription"], + "additionalProperties": false, + } + } + }); + + let post_body = json!({ + "model": "gpt-4o-2024-08-06", + "temperature": temperature, + "messages": messages, + "response_format": response_format + }); + + let response = client + .post(url) + .headers(headers) + .json(&post_body) + .send() + .await?; + + let status = response.status(); + let json_response: Value = response.json().await?; + + let response_text = if status == 200 { + let content_str = json_response["choices"][0]["message"]["content"] + .as_str() + .unwrap_or("choices[0].message.content field not found"); + + if let Ok(content_json) = serde_json::from_str::(content_str) { + content_json["integrated_transcription"] + .as_str() + .unwrap_or("integrated_transcription field not found") + .to_string() + } else { + "Failed to parse content as JSON".to_string() + } + } else { + json_response.to_string() + }; + + Ok(response_text) + } + + fn convert(&mut self) -> Result<(), rusqlite::Error> { + let pre_transcript = self.sqlite.select_pre_transcript_with_hybrid(self.note_id); + let speeches = self + .sqlite + .select_lateset_speeches(self.note_id, 7) + .unwrap_or(Vec::new()); + + return pre_transcript.and_then(|p| { + let whisper = p.hybrid_whisper_content; + let reazonspeech = p.hybrid_reazonspeech_content; + + self.runtime.block_on(async { + let result = + Self::request(whisper, reazonspeech, self.token.clone(), speeches).await; + if result.is_ok() { + let updated = self + .sqlite + .update_model_vosk_to_whisper(p.id, result.unwrap()); + + self.app_handle + .clone() + .emit_all("finalTextConverted", updated.unwrap()) + .unwrap(); + } else { + println!("whisper api is temporally failed, so skipping...") + } + }); + + Ok(()) + }); + } +} + +pub static SINGLETON_INSTANCE: Mutex> = Mutex::new(None); + +pub fn initialize_transcription_hybrid_online(app_handle: AppHandle, note_id: u64) { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + if singleton.is_none() { + *singleton = Some(TranscriptionHybridOnline::new(app_handle, note_id)); + } +} + +pub fn drop_transcription_hybrid_online() { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + *singleton = None; +} diff --git a/src-tauri/src/module/transcription_hybrid_reazonspeech.rs b/src-tauri/src/module/transcription_hybrid_reazonspeech.rs new file mode 100644 index 0000000..a7b5c73 --- /dev/null +++ b/src-tauri/src/module/transcription_hybrid_reazonspeech.rs @@ -0,0 +1,183 @@ +use super::{sqlite::Sqlite, transcription_hybrid_online}; + +use crossbeam_channel::Receiver; +use hound::SampleFormat; +use sherpa_rs::zipformer::ZipFormer; +use std::sync::Mutex; +use tauri::{AppHandle, Manager}; + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TraceCompletion {} + +pub struct TranscriptionHybridReazonspeech { + app_handle: AppHandle, + sqlite: Sqlite, + model: ZipFormer, + note_id: u64, +} + +impl TranscriptionHybridReazonspeech { + pub fn new(app_handle: AppHandle, note_id: u64) -> Self { + let model_path = app_handle + .path_resolver() + .resolve_resource(format!("resources/reazonspeech")) + .unwrap() + .to_string_lossy() + .to_string(); + let config = sherpa_rs::zipformer::ZipFormerConfig { + encoder: format!("{}/encoder-epoch-99-avg-1.onnx", model_path).into(), + decoder: format!("{}/decoder-epoch-99-avg-1.onnx", model_path).into(), + joiner: format!("{}/joiner-epoch-99-avg-1.onnx", model_path).into(), + tokens: format!("{}/tokens.txt", model_path).into(), + provider: Some("cpu".to_string()), // 指定しない(CoreML)とエラーになる + ..Default::default() + }; + + TranscriptionHybridReazonspeech { + app_handle, + sqlite: Sqlite::new(), + model: ZipFormer::new(config).unwrap(), + note_id, + } + } + + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { + while Self::convert( + self, + stop_convert_rx.clone(), + use_no_vosk_queue_terminate_mode, + ) + .is_ok() + { + if use_no_vosk_queue_terminate_mode { + transcription_hybrid_online::initialize_transcription_hybrid_online( + self.app_handle.clone(), + self.note_id, + ); + let mut lock = transcription_hybrid_online::SINGLETON_INSTANCE + .lock() + .unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx.clone(), use_no_vosk_queue_terminate_mode); + } + + let vosk_speech = self + .sqlite + .select_no_proccessed_with_hybrid_reazonspeech(self.note_id); + if vosk_speech.is_err() { + break; + } + } + if stop_convert_rx.try_recv().is_ok() { + break; + } + } + } + + fn convert( + &mut self, + stop_convert_rx: Receiver<()>, + use_no_vosk_queue_terminate_mode: bool, + ) -> Result<(), rusqlite::Error> { + let vosk_speech = self + .sqlite + .select_no_proccessed_with_hybrid_reazonspeech(self.note_id); + + return vosk_speech.and_then(|speech| { + let mut reader = hound::WavReader::open(speech.wav).unwrap(); + + let spec = reader.spec(); + let sample_rate = spec.sample_rate; + let is_too_short = (reader.duration() / sample_rate as u32) < 1; + + if is_too_short { + println!("input is too short, so skipping..."); + let mut updated = self + .sqlite + .update_model_vosk_to_whisper(speech.id, "".to_string()) + .unwrap(); + updated.content = speech.content; + self.app_handle + .clone() + .emit_all("finalTextConverted", updated) + .unwrap(); + return Ok(()); + } + // 必要なゼロパディングの秒数を定義 + let padding_seconds = 0.5; // 前後に0.5秒ずつ追加 + let padding_samples = (padding_seconds * sample_rate as f32) as usize; + let total_samples = + (spec.channels as usize) * (reader.duration() as usize) + (2 * padding_samples); + + let mut data = Vec::with_capacity(total_samples); + + // 前方のゼロパディングを追加 + data.extend(vec![0.0; padding_samples]); + + // 音声データの読み込み + match (spec.bits_per_sample, spec.sample_format) { + (16, SampleFormat::Int) => { + for sample in reader.samples::() { + data.push((sample.unwrap() as f32) / (i16::MAX as f32)); + } + } + (24, SampleFormat::Int) => { + for sample in reader.samples::() { + let val = (sample.unwrap() as f32) / (0x00FF_FFFF as f32); + data.push(val); + } + } + (32, SampleFormat::Int) => { + for sample in reader.samples::() { + data.push((sample.unwrap() as f32) / (i32::MAX as f32)); + } + } + (32, SampleFormat::Float) => { + for sample in reader.samples::() { + data.push(sample.unwrap()); + } + } + _ => panic!( + "Tried to read file but there was a problem: {:?}", + hound::Error::Unsupported + ), + } + + // 後方のゼロパディングを追加 + data.extend(vec![0.0; padding_samples]); + + let text = self.model.decode(sample_rate, data); + + let _updated = self + .sqlite + .update_hybrid_reazonspeech_content(speech.id, text); + + transcription_hybrid_online::initialize_transcription_hybrid_online( + self.app_handle.clone(), + self.note_id, + ); + let mut lock = transcription_hybrid_online::SINGLETON_INSTANCE + .lock() + .unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx, use_no_vosk_queue_terminate_mode); + } + + Ok(()) + }); + } +} + +pub static SINGLETON_INSTANCE: Mutex> = Mutex::new(None); + +pub fn initialize_transcription_hybrid_reazonspeech(app_handle: AppHandle, note_id: u64) { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + if singleton.is_none() { + *singleton = Some(TranscriptionHybridReazonspeech::new(app_handle, note_id)); + } +} + +pub fn drop_transcription_hybrid_reazonspeech() { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + *singleton = None; +} diff --git a/src-tauri/src/module/transcription_hybrid_whisper.rs b/src-tauri/src/module/transcription_hybrid_whisper.rs new file mode 100644 index 0000000..865c715 --- /dev/null +++ b/src-tauri/src/module/transcription_hybrid_whisper.rs @@ -0,0 +1,210 @@ +use crate::module::transcription_hybrid_online; + +use super::{sqlite::Sqlite, transcriber::Transcriber}; + +use crossbeam_channel::Receiver; +use hound::SampleFormat; +use samplerate_rs::{convert, ConverterType}; +use std::sync::Mutex; +use tauri::{AppHandle, Manager}; +use whisper_rs::WhisperContext; + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TraceCompletion {} + +pub struct TranscriptionHybridWhisper { + app_handle: AppHandle, + sqlite: Sqlite, + ctx: WhisperContext, + note_id: u64, +} + +impl TranscriptionHybridWhisper { + pub fn new(app_handle: AppHandle, note_id: u64) -> Self { + let app_handle_clone = app_handle.clone(); + + TranscriptionHybridWhisper { + app_handle, + sqlite: Sqlite::new(), + ctx: Transcriber::build(app_handle_clone, "large".to_string()), + note_id, + } + } + + pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) { + while Self::convert( + self, + stop_convert_rx.clone(), + use_no_vosk_queue_terminate_mode, + ) + .is_ok() + { + if use_no_vosk_queue_terminate_mode { + transcription_hybrid_online::initialize_transcription_hybrid_online( + self.app_handle.clone(), + self.note_id, + ); + let mut lock = transcription_hybrid_online::SINGLETON_INSTANCE + .lock() + .unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx.clone(), use_no_vosk_queue_terminate_mode); + } + + let vosk_speech = self + .sqlite + .select_no_proccessed_with_hybrid_whisper(self.note_id); + if vosk_speech.is_err() { + break; + } + } + if stop_convert_rx.try_recv().is_ok() { + break; + } + } + } + + fn convert( + &mut self, + stop_convert_rx: Receiver<()>, + use_no_vosk_queue_terminate_mode: bool, + ) -> Result<(), rusqlite::Error> { + let vosk_speech = self + .sqlite + .select_no_proccessed_with_hybrid_whisper(self.note_id); + + return vosk_speech.and_then(|speech| { + let mut reader = hound::WavReader::open(speech.wav).unwrap(); + + let spec = reader.spec(); + if (reader.duration() / spec.sample_rate as u32) < 1 { + println!("input is too short, so skipping..."); + let mut updated = self + .sqlite + .update_model_vosk_to_whisper(speech.id, "".to_string()) + .unwrap(); + updated.content = speech.content; + self.app_handle + .clone() + .emit_all("finalTextConverted", updated) + .unwrap(); + return Ok(()); + } + + let mut data = + Vec::with_capacity((spec.channels as usize) * (reader.duration() as usize)); + match (spec.bits_per_sample, spec.sample_format) { + (16, SampleFormat::Int) => { + for sample in reader.samples::() { + data.push((sample.unwrap() as f32) / (0x7fffi32 as f32)); + } + } + (24, SampleFormat::Int) => { + for sample in reader.samples::() { + let val = (sample.unwrap() as f32) / (0x00ff_ffffi32 as f32); + data.push(val); + } + } + (32, SampleFormat::Int) => { + for sample in reader.samples::() { + data.push((sample.unwrap() as f32) / (0x7fff_ffffi32 as f32)); + } + } + (32, SampleFormat::Float) => { + for sample in reader.samples::() { + data.push(sample.unwrap()); + } + } + _ => panic!( + "Tried to read file but there was a problem: {:?}", + hound::Error::Unsupported + ), + } + let data = if spec.channels != 1 { + whisper_rs::convert_stereo_to_mono_audio(&data).unwrap() + } else { + data + }; + let audio_data = convert( + spec.sample_rate, + 16000, + 1, + ConverterType::SincBestQuality, + &data, + ) + .unwrap(); + + let mut state = self.ctx.create_state().expect("failed to create state"); + let result = state.full( + Transcriber::build_params("ja".to_string(), "large".to_string()), + &audio_data[..], + ); + if result.is_ok() { + let num_segments = state + .full_n_segments() + .expect("failed to get number of segments"); + let mut converted: Vec = vec!["".to_string()]; + for i in 0..num_segments { + let segment = state.full_get_segment_text(i); + if segment.is_ok() { + converted.push(segment.unwrap().to_string()); + }; + } + + let result = converted.join(""); + if result.is_empty() { + println!("Whisper returned empty content, falling back to Vosk content"); + let mut updated = self + .sqlite + .update_model_vosk_to_whisper(speech.id, result) + .unwrap(); + updated.content = speech.content; + self.app_handle + .clone() + .emit_all("finalTextConverted", updated) + .unwrap(); + } else { + let _updated = self.sqlite.update_hybrid_whisper_content(speech.id, result); + + transcription_hybrid_online::initialize_transcription_hybrid_online( + self.app_handle.clone(), + self.note_id, + ); + let mut lock = transcription_hybrid_online::SINGLETON_INSTANCE + .lock() + .unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx, use_no_vosk_queue_terminate_mode); + } + } + } else { + println!("whisper is temporally failed, so skipping..."); + let mut updated = self + .sqlite + .update_model_vosk_to_whisper(speech.id, "".to_string()) + .unwrap(); + updated.content = speech.content; + self.app_handle + .clone() + .emit_all("finalTextConverted", updated) + .unwrap(); + } + + Ok(()) + }); + } +} + +pub static SINGLETON_INSTANCE: Mutex> = Mutex::new(None); + +pub fn initialize_transcription_hybrid_whisper(app_handle: AppHandle, note_id: u64) { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + if singleton.is_none() { + *singleton = Some(TranscriptionHybridWhisper::new(app_handle, note_id)); + } +} + +pub fn drop_transcription_hybrid_whisper() { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + *singleton = None; +} diff --git a/src/components/molecules/TranscriptionAccuracy.tsx b/src/components/molecules/TranscriptionAccuracy.tsx index da3cf31..2a4af96 100644 --- a/src/components/molecules/TranscriptionAccuracy.tsx +++ b/src/components/molecules/TranscriptionAccuracy.tsx @@ -68,6 +68,8 @@ const TranscriptionAccuracy = (): JSX.Element => { return "AmiVoice"; case "online-chat": return "ChatGPT"; + case "hybrid-transcript": + return "Lycoris"; case "small-translate-to-en": return "汎用パック(低精度)"; case "medium-translate-to-en": @@ -140,6 +142,29 @@ const TranscriptionAccuracy = (): JSX.Element => { オフ + {settingKeyOpenai + && downloadedModels.includes("large") + && downloadedModelsReazonSpeech.length > 0 + && (speakerLanguage?.startsWith("ja") || speakerLanguage?.startsWith("small-ja")) && + <> +
+
+

ハイブリッド

+
+

+ + + + + 文字起こし +

+
  • + +
  • + } {(downloadedModels.length > 0 || (downloadedModelsReazonSpeech.length > 0 && (speakerLanguage?.startsWith("ja") || speakerLanguage?.startsWith("small-ja")))) && <> diff --git a/src/components/organisms/NoteMain.tsx b/src/components/organisms/NoteMain.tsx index caba556..0c174a0 100644 --- a/src/components/organisms/NoteMain.tsx +++ b/src/components/organisms/NoteMain.tsx @@ -60,6 +60,7 @@ const NoteMain = (): JSX.Element => { showGotoBottomButton(); }, []); const [isReadyToRecognize, setIsReadyToRecognize] = useState(false); + useEffect(() => { const scrollContainer = scrollContainerRef.current; if (scrollContainer) { @@ -68,6 +69,7 @@ const NoteMain = (): JSX.Element => { return () => scrollContainer.removeEventListener('scroll', scroll); } }, [selectedNote]); + useEffect(() => { if (recordingNote === selectedNote!.note_id) { const rect = bottomRef.current?.getBoundingClientRect(); @@ -79,6 +81,7 @@ const NoteMain = (): JSX.Element => { } } }, [histories, recordingNote]); + useEffect(() => { setPartialText(null) setPartialTextDesktop(null) @@ -96,6 +99,7 @@ const NoteMain = (): JSX.Element => { unlistenPartialText.then(f => f()); } }, [selectedNote, recordingNote]) + useEffect(() => { const unlistenFinalText = listen('finalTextRecognized', event => { const { is_desktop, ...current } = event.payload as SpeechHistoryType & { is_desktop: boolean } @@ -185,6 +189,26 @@ const NoteMain = (): JSX.Element => { } }, [isRecording]) + useEffect(() => { + const unlisten = listen('traceCompletion', () => { + setPartialText(null); + setPartialTextDesktop(null); + }) + return () => { + unlisten.then(f => f()); + } + }, []) + + useEffect(() => { + const unlisten = listen('traceUnCompletion', () => { + setPartialText(null); + setPartialTextDesktop(null); + }) + return () => { + unlisten.then(f => f()); + } + }, []) + return (<>
    From cf531c1eb4fc79e81318f7111116e94bedf3a3fa Mon Sep 17 00:00:00 2001 From: Kodai Aoyama Date: Sat, 9 Nov 2024 22:55:41 +0900 Subject: [PATCH 3/3] refactor: Add hybrid status index to speeches table This commit adds a new index, idx_hybrid_status, to the speeches table in the database. The index includes the columns is_done_with_hybrid_reazonspeech and is_done_with_hybrid_whisper. This index will improve the performance of hybrid transcription operations. --- src-tauri/migrations/001.sql | 6 +++++- src-tauri/src/module/transcription_hybrid.rs | 17 +++++++++------ .../transcription_hybrid_reazonspeech.rs | 3 --- .../module/transcription_hybrid_whisper.rs | 3 --- src/components/organisms/NoteMain.tsx | 21 +++++++------------ 5 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src-tauri/migrations/001.sql b/src-tauri/migrations/001.sql index 762bf1e..01ef185 100644 --- a/src-tauri/migrations/001.sql +++ b/src-tauri/migrations/001.sql @@ -182,4 +182,8 @@ VALUES("jvnv-F2-jp", "style-bert-vits2-voice"); INSERT INTO models(model_name, model_type) VALUES("jvnv-M1-jp", "style-bert-vits2-voice"); INSERT INTO models(model_name, model_type) -VALUES("jvnv-M2-jp", "style-bert-vits2-voice"); \ No newline at end of file +VALUES("jvnv-M2-jp", "style-bert-vits2-voice"); +CREATE INDEX idx_hybrid_status ON speeches( + is_done_with_hybrid_reazonspeech, + is_done_with_hybrid_whisper +); \ No newline at end of file diff --git a/src-tauri/src/module/transcription_hybrid.rs b/src-tauri/src/module/transcription_hybrid.rs index bf25cee..d894f4b 100644 --- a/src-tauri/src/module/transcription_hybrid.rs +++ b/src-tauri/src/module/transcription_hybrid.rs @@ -1,12 +1,11 @@ -use super::{transcription_hybrid_online, transcription_hybrid_reazonspeech, transcription_hybrid_whisper}; +use super::{ + transcription_hybrid_online, transcription_hybrid_reazonspeech, transcription_hybrid_whisper, +}; use crossbeam_channel::Receiver; use std::{sync::Mutex, thread}; use tauri::AppHandle; -#[derive(Debug, Clone, serde::Serialize)] -pub struct TraceCompletion {} - pub struct TranscriptionHybrid { app_handle: AppHandle, note_id: u64, @@ -38,7 +37,10 @@ impl TranscriptionHybrid { .lock() .unwrap(); if let Some(singleton) = lock.as_mut() { - singleton.start(stop_convert_rx_clone_for_reazonspeech, use_no_vosk_queue_terminate_mode); + singleton.start( + stop_convert_rx_clone_for_reazonspeech, + use_no_vosk_queue_terminate_mode, + ); } }); @@ -51,7 +53,10 @@ impl TranscriptionHybrid { .lock() .unwrap(); if let Some(singleton) = lock.as_mut() { - singleton.start(stop_convert_rx_clone_for_whisper, use_no_vosk_queue_terminate_mode); + singleton.start( + stop_convert_rx_clone_for_whisper, + use_no_vosk_queue_terminate_mode, + ); } }); } diff --git a/src-tauri/src/module/transcription_hybrid_reazonspeech.rs b/src-tauri/src/module/transcription_hybrid_reazonspeech.rs index a7b5c73..8a49638 100644 --- a/src-tauri/src/module/transcription_hybrid_reazonspeech.rs +++ b/src-tauri/src/module/transcription_hybrid_reazonspeech.rs @@ -6,9 +6,6 @@ use sherpa_rs::zipformer::ZipFormer; use std::sync::Mutex; use tauri::{AppHandle, Manager}; -#[derive(Debug, Clone, serde::Serialize)] -pub struct TraceCompletion {} - pub struct TranscriptionHybridReazonspeech { app_handle: AppHandle, sqlite: Sqlite, diff --git a/src-tauri/src/module/transcription_hybrid_whisper.rs b/src-tauri/src/module/transcription_hybrid_whisper.rs index 865c715..8ad4298 100644 --- a/src-tauri/src/module/transcription_hybrid_whisper.rs +++ b/src-tauri/src/module/transcription_hybrid_whisper.rs @@ -9,9 +9,6 @@ use std::sync::Mutex; use tauri::{AppHandle, Manager}; use whisper_rs::WhisperContext; -#[derive(Debug, Clone, serde::Serialize)] -pub struct TraceCompletion {} - pub struct TranscriptionHybridWhisper { app_handle: AppHandle, sqlite: Sqlite, diff --git a/src/components/organisms/NoteMain.tsx b/src/components/organisms/NoteMain.tsx index 0c174a0..fd7c160 100644 --- a/src/components/organisms/NoteMain.tsx +++ b/src/components/organisms/NoteMain.tsx @@ -190,24 +190,19 @@ const NoteMain = (): JSX.Element => { }, [isRecording]) useEffect(() => { - const unlisten = listen('traceCompletion', () => { + const resetPartialTexts = () => { setPartialText(null); setPartialTextDesktop(null); - }) - return () => { - unlisten.then(f => f()); - } - }, []) + }; + + const unlistenCompletion = listen('traceCompletion', resetPartialTexts); + const unlistenUnCompletion = listen('traceUnCompletion', resetPartialTexts); - useEffect(() => { - const unlisten = listen('traceUnCompletion', () => { - setPartialText(null); - setPartialTextDesktop(null); - }) return () => { - unlisten.then(f => f()); + unlistenCompletion.then(f => f()); + unlistenUnCompletion.then(f => f()); } - }, []) + }, []); return (<>