Skip to content

Commit

Permalink
Merge pull request #184 from solaoi/feature/hybrid-transcription
Browse files Browse the repository at this point in the history
Feature/hybrid transcription
  • Loading branch information
solaoi authored Nov 9, 2024
2 parents ad76d35 + cf531c1 commit a0cb5cd
Show file tree
Hide file tree
Showing 20 changed files with 963 additions and 22 deletions.
10 changes: 9 additions & 1 deletion src-tauri/migrations/001.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ CREATE TABLE speeches (
created_at_unixtime INTEGER DEFAULT (CAST(strftime('%s', 'now') AS INTEGER)),
content TEXT,
content_2 TEXT,
is_done_with_hybrid_reazonspeech INTEGER DEFAULT 0,
is_done_with_hybrid_whisper INTEGER DEFAULT 0,
hybrid_reazonspeech_content TEXT,
hybrid_whisper_content TEXT,
wav TEXT,
model TEXT,
-- manual|vosk|whisper
Expand Down Expand Up @@ -178,4 +182,8 @@ VALUES("jvnv-F2-jp", "style-bert-vits2-voice");
INSERT INTO models(model_name, model_type)
VALUES("jvnv-M1-jp", "style-bert-vits2-voice");
INSERT INTO models(model_name, model_type)
VALUES("jvnv-M2-jp", "style-bert-vits2-voice");
VALUES("jvnv-M2-jp", "style-bert-vits2-voice");
CREATE INDEX idx_hybrid_status ON speeches(
is_done_with_hybrid_reazonspeech,
is_done_with_hybrid_whisper
);
7 changes: 6 additions & 1 deletion src-tauri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
)]

use tauri::{
http::{HttpRange, ResponseBuilder}, AppHandle, Manager, PathResolver, State, Window
http::{HttpRange, ResponseBuilder},
AppHandle, Manager, PathResolver, State, Window,
};
use tauri_plugin_sql::{Migration, MigrationKind};

Expand Down Expand Up @@ -41,6 +42,7 @@ use module::{
synthesizer::{self, Synthesizer},
transcription::{TraceCompletion, Transcription},
transcription_amivoice::TranscriptionAmivoice,
transcription_hybrid::TranscriptionHybrid,
transcription_ja::TranscriptionJa,
transcription_online::TranscriptionOnline,
translation_en::TranslationEn,
Expand Down Expand Up @@ -325,6 +327,9 @@ fn start_trace_command(
} else if transcription_accuracy.starts_with("reazonspeech") {
let mut transcription_ja = TranscriptionJa::new(app_handle, note_id);
transcription_ja.start(stop_convert_rx, true);
} else if transcription_accuracy.starts_with("hybrid-transcript") {
let mut transcription_hybrid = TranscriptionHybrid::new(app_handle, note_id);
transcription_hybrid.start(stop_convert_rx, true);
} else {
let mut transcription = Transcription::new(
app_handle,
Expand Down
4 changes: 2 additions & 2 deletions src-tauri/src/module/chat_online.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ impl ChatOnline {
}
}

pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) {
pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) {
while Self::convert(self).is_ok() {
if is_continuous {
if use_no_vosk_queue_terminate_mode {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
if vosk_speech.is_err() {
self.app_handle
Expand Down
6 changes: 5 additions & 1 deletion src-tauri/src/module/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ mod sqlite;
pub mod synthesizer;
mod transcriber;
pub mod transcription;
pub mod transcription_ja;
pub mod transcription_amivoice;
pub mod transcription_hybrid;
mod transcription_hybrid_online;
mod transcription_hybrid_reazonspeech;
mod transcription_hybrid_whisper;
pub mod transcription_ja;
pub mod transcription_online;
pub mod translation_en;
pub mod translation_ja;
Expand Down
16 changes: 14 additions & 2 deletions src-tauri/src/module/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ use tauri::{api::path::data_dir, AppHandle, Manager};

use super::{
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice,
transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high,
writer::Writer,
transcription_hybrid, transcription_ja, transcription_online, translation_en, translation_ja,
translation_ja_high, writer::Writer,
};

pub struct Record {
Expand Down Expand Up @@ -258,6 +258,17 @@ impl Record {
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("hybrid-transcript")
{
transcription_hybrid::initialize_transcription_hybrid(
app_handle_clone,
note_id,
);
let mut lock =
transcription_hybrid::SINGLETON_INSTANCE.lock().unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else {
transcription::initialize_transcription(
app_handle_clone,
Expand Down Expand Up @@ -300,6 +311,7 @@ impl Record {
translation_ja_high::drop_translation_ja_high();
transcription_online::drop_transcription_online();
transcription_amivoice::drop_transcription_amivoice();
transcription_hybrid::drop_transcription_hybrid();
chat_online::drop_chat_online();
} else {
drop(stop_convert_tx)
Expand Down
12 changes: 11 additions & 1 deletion src-tauri/src/module/record_desktop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use screencapturekit::{
use vosk::Recognizer;

use super::{
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, transcription_hybrid, transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer
};

pub struct RecordDesktop {
Expand Down Expand Up @@ -288,6 +288,15 @@ impl RecordDesktop {
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("hybrid-transcript") {
transcription_hybrid::initialize_transcription_hybrid(
app_handle_clone,
note_id,
);
let mut lock = transcription_hybrid::SINGLETON_INSTANCE.lock().unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else {
transcription::initialize_transcription(
app_handle_clone,
Expand Down Expand Up @@ -334,6 +343,7 @@ impl RecordDesktop {
translation_ja_high::drop_translation_ja_high();
transcription_online::drop_transcription_online();
transcription_amivoice::drop_transcription_amivoice();
transcription_hybrid::drop_transcription_hybrid();
chat_online::drop_chat_online();
} else {
drop(stop_convert_tx)
Expand Down
121 changes: 121 additions & 0 deletions src-tauri/src/module/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ pub struct Speech {
pub is_desktop: bool,
}

#[derive(Debug, Clone, serde::Serialize)]
pub struct PreTranscript {
pub id: u16,
pub hybrid_whisper_content: String,
pub hybrid_reazonspeech_content: String,
}

#[derive(Debug, Clone, serde::Serialize)]
pub struct Updated {
pub id: u16,
Expand Down Expand Up @@ -96,6 +103,92 @@ impl Sqlite {
});
}

pub fn select_lateset_speeches(
&self,
note_id: u64,
max_hisotry_count: u64,
) -> Result<Vec<Speech>, rusqlite::Error> {
let mut stmt = self.conn
.prepare("SELECT id,speech_type,created_at_unixtime,content,wav,model,model_description,note_id FROM speeches WHERE model = \"whisper\" AND is_done_with_hybrid_whisper = 1 AND is_done_with_hybrid_reazonspeech = 1 AND note_id = ?1 ORDER BY created_at_unixtime DESC LIMIT ?2").unwrap();
let results = stmt
.query_map(params![note_id, max_hisotry_count], |row| {
Ok(Speech {
id: row.get_unwrap(0),
speech_type: row.get_unwrap(1),
created_at_unixtime: row.get_unwrap(2),
content: row.get_unwrap(3),
wav: row.get_unwrap(4),
model: row.get_unwrap(5),
model_description: row.get_unwrap(6),
note_id: row.get_unwrap(7),
is_desktop: false,
})
})
.unwrap()
.collect::<Result<Vec<_>, rusqlite::Error>>();
results
}

pub fn select_no_proccessed_with_hybrid_reazonspeech(
&self,
note_id: u64,
) -> Result<Speech, rusqlite::Error> {
return self.conn
.query_row("SELECT id,speech_type,created_at_unixtime,content,wav,model,model_description,note_id FROM speeches WHERE model = \"vosk\" AND is_done_with_hybrid_reazonspeech = 0 AND note_id = ?1 ORDER BY created_at_unixtime ASC LIMIT 1",
params![note_id],
|row| {
Ok(Speech {
id: row.get_unwrap(0),
speech_type: row.get_unwrap(1),
created_at_unixtime: row.get_unwrap(2),
content: row.get_unwrap(3),
wav: row.get_unwrap(4),
model: row.get_unwrap(5),
model_description: row.get_unwrap(6),
note_id: row.get_unwrap(7),
is_desktop: false,
})
});
}

pub fn select_no_proccessed_with_hybrid_whisper(
&self,
note_id: u64,
) -> Result<Speech, rusqlite::Error> {
return self.conn
.query_row("SELECT id,speech_type,created_at_unixtime,content,wav,model,model_description,note_id FROM speeches WHERE model = \"vosk\" AND is_done_with_hybrid_whisper = 0 AND note_id = ?1 ORDER BY created_at_unixtime ASC LIMIT 1",
params![note_id],
|row| {
Ok(Speech {
id: row.get_unwrap(0),
speech_type: row.get_unwrap(1),
created_at_unixtime: row.get_unwrap(2),
content: row.get_unwrap(3),
wav: row.get_unwrap(4),
model: row.get_unwrap(5),
model_description: row.get_unwrap(6),
note_id: row.get_unwrap(7),
is_desktop: false,
})
});
}

pub fn select_pre_transcript_with_hybrid(
&self,
note_id: u64,
) -> Result<PreTranscript, rusqlite::Error> {
return self.conn
.query_row("SELECT id, hybrid_whisper_content, hybrid_reazonspeech_content FROM speeches WHERE model = \"vosk\" AND is_done_with_hybrid_whisper = 1 AND is_done_with_hybrid_reazonspeech = 1 AND note_id = ?1 ORDER BY created_at_unixtime ASC LIMIT 1",
params![note_id],
|row| {
Ok(PreTranscript {
id: row.get_unwrap(0),
hybrid_whisper_content: row.get_unwrap(1),
hybrid_reazonspeech_content: row.get_unwrap(2),
})
});
}

pub fn select_whisper_token(&self) -> Result<String, rusqlite::Error> {
return self.conn.query_row(
"SELECT setting_status FROM settings WHERE setting_name = \"settingKeyOpenai\"",
Expand Down Expand Up @@ -287,6 +380,34 @@ impl Sqlite {
}
}

pub fn update_hybrid_reazonspeech_content(
&self,
id: u16,
content: String,
) -> Result<Updated, rusqlite::Error> {
match self.conn.execute(
"UPDATE speeches SET is_done_with_hybrid_reazonspeech = 1, hybrid_reazonspeech_content = ?1 WHERE id = ?2",
params![content, id],
) {
Ok(_) => Ok(Updated { id, content }),
Err(err) => Err(err),
}
}

pub fn update_hybrid_whisper_content(
&self,
id: u16,
content: String,
) -> Result<Updated, rusqlite::Error> {
match self.conn.execute(
"UPDATE speeches SET is_done_with_hybrid_whisper = 1, hybrid_whisper_content = ?1 WHERE id = ?2",
params![content, id],
) {
Ok(_) => Ok(Updated { id, content }),
Err(err) => Err(err),
}
}

pub fn update_model_is_downloaded(
&self,
model_name: String,
Expand Down
4 changes: 2 additions & 2 deletions src-tauri/src/module/transcription.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ impl Transcription {
}
}

pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) {
pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) {
while Self::convert(self).is_ok() {
if is_continuous {
if use_no_vosk_queue_terminate_mode {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
if vosk_speech.is_err() {
self.app_handle
Expand Down
4 changes: 2 additions & 2 deletions src-tauri/src/module/transcription_amivoice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ impl TranscriptionAmivoice {
}
}

pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) {
pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) {
while Self::convert(self).is_ok() {
if is_continuous {
if use_no_vosk_queue_terminate_mode {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
if vosk_speech.is_err() {
self.app_handle
Expand Down
80 changes: 80 additions & 0 deletions src-tauri/src/module/transcription_hybrid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use super::{
transcription_hybrid_online, transcription_hybrid_reazonspeech, transcription_hybrid_whisper,
};

use crossbeam_channel::Receiver;
use std::{sync::Mutex, thread};
use tauri::AppHandle;

pub struct TranscriptionHybrid {
app_handle: AppHandle,
note_id: u64,
}

impl TranscriptionHybrid {
pub fn new(app_handle: AppHandle, note_id: u64) -> Self {
TranscriptionHybrid {
app_handle,
note_id,
}
}

pub fn start(&mut self, stop_convert_rx: Receiver<()>, use_no_vosk_queue_terminate_mode: bool) {
let note_id = self.note_id;

let app_handle_clone_for_reazonspeech = self.app_handle.clone();
let app_handle_clone_for_whisper = self.app_handle.clone();

let stop_convert_rx_clone_for_reazonspeech = stop_convert_rx.clone();
let stop_convert_rx_clone_for_whisper = stop_convert_rx.clone();

thread::spawn(move || {
transcription_hybrid_reazonspeech::initialize_transcription_hybrid_reazonspeech(
app_handle_clone_for_reazonspeech,
note_id,
);
let mut lock = transcription_hybrid_reazonspeech::SINGLETON_INSTANCE
.lock()
.unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(
stop_convert_rx_clone_for_reazonspeech,
use_no_vosk_queue_terminate_mode,
);
}
});

thread::spawn(move || {
transcription_hybrid_whisper::initialize_transcription_hybrid_whisper(
app_handle_clone_for_whisper,
note_id,
);
let mut lock = transcription_hybrid_whisper::SINGLETON_INSTANCE
.lock()
.unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(
stop_convert_rx_clone_for_whisper,
use_no_vosk_queue_terminate_mode,
);
}
});
}
}

pub static SINGLETON_INSTANCE: Mutex<Option<TranscriptionHybrid>> = Mutex::new(None);

pub fn initialize_transcription_hybrid(app_handle: AppHandle, note_id: u64) {
let mut singleton = SINGLETON_INSTANCE.lock().unwrap();
if singleton.is_none() {
*singleton = Some(TranscriptionHybrid::new(app_handle, note_id));
}
}

pub fn drop_transcription_hybrid() {
let mut singleton = SINGLETON_INSTANCE.lock().unwrap();
*singleton = None;
transcription_hybrid_reazonspeech::drop_transcription_hybrid_reazonspeech();
transcription_hybrid_whisper::drop_transcription_hybrid_whisper();
transcription_hybrid_online::drop_transcription_hybrid_online();
}
Loading

0 comments on commit a0cb5cd

Please sign in to comment.