Skip to content

Commit

Permalink
Merge pull request #179 from solaoi/feature/add-reazonspeech
Browse files Browse the repository at this point in the history
reazonspeechモデルを追加
  • Loading branch information
solaoi authored Nov 4, 2024
2 parents 476c330 + ad0d193 commit 202066a
Show file tree
Hide file tree
Showing 24 changed files with 1,248 additions and 594 deletions.
442 changes: 322 additions & 120 deletions src-tauri/Cargo.lock

Large diffs are not rendered by default.

19 changes: 12 additions & 7 deletions src-tauri/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ rust-version = "1.80"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[build-dependencies]
tauri-build = { version = "1.5.4", features = [] }
tauri-build = { version = "1.5.5", features = [] }

[dependencies]
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
tauri = { version = "1.7.2", features = ["api-all"] }
vosk = "0.1.0"
tauri = { version = "1.8.1", features = ["api-all"] }
vosk = "0.2.0"
cpal = "0.14.1"
dasp = "0.11"
unicode-segmentation = "1.11.0"
Expand All @@ -33,24 +33,29 @@ tokio = { version = "1", features = ["full"] }
futures-util = "0.3.30"
# serve audio
urlencoding = "2.1.3"
# screencapturekit = "0.2.7"
# screencapturekit = "0.2.8"
# fix thread crash
# screencapturekit = { git = "https://github.com/doom-fish/screencapturekit-rs", rev = "8829f70"}
screencapturekit = { git = "https://github.com/solaoi/screencapturekit-rs" }
xcap = "0.0.12"
xcap = "0.0.14"
# permission
macos-accessibility-client = "0.0.1"
core-graphics = "0.24.0"
objc = "0.2"
objc-foundation = "0.1"
objc_id = "0.1"
ct2rs = { version = "0.9.4", features = ["accelerate"] }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "32e8945", features = [
# mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "32e8945", features = [
# "metal",
# ] }
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", tag = "v0.3.2", features = [
"metal",
] }
tauri-plugin-clipboard = "1.1.4"
sbv2_core = { version = "0.1.4", features = ["dynamic"] }
# whisper-rs = { version = "0.11.1", features = ["metal"] }
whisper-rs = { git = "https://github.com/tazz4843/whisper-rs.git", rev = "67924ca", features = ["metal"] }
whisper-rs = { git = "https://github.com/tazz4843/whisper-rs.git", rev = "dd0c3af", features = ["metal"] }
sherpa-rs = "0.5.1"

[dependencies.tauri-plugin-sql]
git = "https://github.com/tauri-apps/plugins-workspace"
Expand Down
4 changes: 4 additions & 0 deletions src-tauri/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,19 @@ fn main() {
"cargo:rustc-link-search=native={}",
lib_path.to_str().unwrap()
);

let is_release = match &*env_var("PROFILE") {
"debug" => false,
"release" => true,
_ => panic!("unexpected value set for PROFILE env"),
};

// for vosk and sbv2_core
if is_release {
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/../Resources/lib");
} else {
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path/../../lib");
}

tauri_build::build()
}
Binary file added src-tauri/lib/libcargs.dylib
Binary file not shown.
Binary file added src-tauri/lib/libonnxruntime.1.17.1.dylib
Binary file not shown.
Binary file not shown.
Binary file added src-tauri/lib/libsherpa-onnx-c-api.dylib
Binary file not shown.
2 changes: 2 additions & 0 deletions src-tauri/migrations/001.sql
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ VALUES("fugumt-ja-en", "fugumt-ja-en");
INSERT INTO models(model_name, model_type)
VALUES("honyaku-13b", "honyaku-13b");
INSERT INTO models(model_name, model_type)
VALUES("reazonspeech", "reazonspeech");
INSERT INTO models(model_name, model_type)
VALUES("style-bert-vits2", "style-bert-vits2");
INSERT INTO models(model_name, model_type)
VALUES("tsukuyomi-chan", "style-bert-vits2-voice");
Expand Down
44 changes: 40 additions & 4 deletions src-tauri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
)]

use tauri::{
http::{HttpRange, ResponseBuilder},
AppHandle, Manager, State, Window,
http::{HttpRange, ResponseBuilder}, AppHandle, Manager, PathResolver, State, Window
};
use tauri_plugin_sql::{Migration, MigrationKind};

use std::{
cmp::min, env, io::{Read, Seek, SeekFrom}, path::PathBuf, str::FromStr, sync::{Arc, Mutex}
cmp::min,
env,
io::{Read, Seek, SeekFrom},
path::PathBuf,
str::FromStr,
sync::{Arc, Mutex},
};

use crossbeam_channel::{unbounded, Sender};
Expand All @@ -37,6 +41,7 @@ use module::{
synthesizer::{self, Synthesizer},
transcription::{TraceCompletion, Transcription},
transcription_amivoice::TranscriptionAmivoice,
transcription_ja::TranscriptionJa,
transcription_online::TranscriptionOnline,
translation_en::TranslationEn,
translation_ja::TranslationJa,
Expand Down Expand Up @@ -133,6 +138,14 @@ fn download_honyaku13b_model_command(app_handle: AppHandle) {
});
}

#[tauri::command]
fn download_reazonspeech_model_command(app_handle: AppHandle) {
std::thread::spawn(move || {
let dl = ModelDirDownloader::new(app_handle);
dl.download("reazonspeech", "downloadReazonSpeechProgress")
});
}

#[tauri::command]
fn download_sbv2_command(app_handle: AppHandle) {
std::thread::spawn(move || {
Expand Down Expand Up @@ -165,7 +178,11 @@ fn list_app_windows_command(app_name: String) -> Vec<AppWindow> {
}

#[tauri::command]
async fn screenshot_command(app_handle: AppHandle, window_id: u32, note_id: u64) -> Result<bool, ()> {
async fn screenshot_command(
app_handle: AppHandle,
window_id: u32,
note_id: u64,
) -> Result<bool, ()> {
let result = screenshot::screenshot(window_id, note_id, app_handle);
Ok(result)
}
Expand Down Expand Up @@ -305,6 +322,9 @@ fn start_trace_command(
let mut translation_ja_high =
TranslationJaHigh::new(app_handle, speaker_language, note_id);
translation_ja_high.start(stop_convert_rx, true);
} else if transcription_accuracy.starts_with("reazonspeech") {
let mut transcription_ja = TranscriptionJa::new(app_handle, note_id);
transcription_ja.start(stop_convert_rx, true);
} else {
let mut transcription = Transcription::new(
app_handle,
Expand All @@ -329,6 +349,17 @@ fn stop_trace_command(state: State<'_, RecordState>, app_handle: AppHandle) {
}
}

fn set_ort_env(path_resolver: &PathResolver) {
let dynamic_library_name = "libonnxruntime.1.19.2.dylib";

let dynamic_library_path = path_resolver
.resolve_resource(format!("lib/{}", dynamic_library_name))
.expect("fail to resolve dynamic library path");

println!("dynamic lib: {}", dynamic_library_path.display());
std::env::set_var("ORT_DYLIB_PATH", dynamic_library_path);
}

fn main() {
tauri::Builder::default()
.register_uri_scheme_protocol("stream", move |_app, request| {
Expand Down Expand Up @@ -387,6 +418,10 @@ fn main() {
)
.build(),
)
.setup(|app| {
set_ort_env(&app.path_resolver());
Ok(())
})
.manage(RecordState(Default::default()))
.manage(SynthesizeState(Default::default()))
.invoke_handler(tauri::generate_handler![
Expand All @@ -400,6 +435,7 @@ fn main() {
download_fugumt_enja_model_command,
download_fugumt_jaen_model_command,
download_honyaku13b_model_command,
download_reazonspeech_model_command,
download_sbv2_command,
download_sbv2_model_command,
list_devices_command,
Expand Down
1 change: 1 addition & 0 deletions src-tauri/src/module/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod sqlite;
pub mod synthesizer;
mod transcriber;
pub mod transcription;
pub mod transcription_ja;
pub mod transcription_amivoice;
pub mod transcription_online;
pub mod translation_en;
Expand Down
13 changes: 12 additions & 1 deletion src-tauri/src/module/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ use tauri::{api::path::data_dir, AppHandle, Manager};

use super::{
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice,
transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer,
transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high,
writer::Writer,
};

pub struct Record {
Expand Down Expand Up @@ -248,6 +249,15 @@ impl Record {
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("reazonspeech") {
transcription_ja::initialize_transcription_ja(
app_handle_clone,
note_id,
);
let mut lock = transcription_ja::SINGLETON_INSTANCE.lock().unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else {
transcription::initialize_transcription(
app_handle_clone,
Expand Down Expand Up @@ -284,6 +294,7 @@ impl Record {
if !is_no_transcription {
stop_convert_tx.send(()).unwrap();
transcription::drop_transcription();
transcription_ja::drop_transcription_ja();
translation_en::drop_translation_en();
translation_ja::drop_translation_ja();
translation_ja_high::drop_translation_ja_high();
Expand Down
13 changes: 11 additions & 2 deletions src-tauri/src/module/record_desktop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ use screencapturekit::{
use vosk::Recognizer;

use super::{
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice,
transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer,
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, transcription_ja, transcription_online, translation_en, translation_ja, translation_ja_high, writer::Writer
};

pub struct RecordDesktop {
Expand Down Expand Up @@ -280,6 +279,15 @@ impl RecordDesktop {
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("reazonspeech") {
transcription_ja::initialize_transcription_ja(
app_handle_clone,
note_id,
);
let mut lock = transcription_ja::SINGLETON_INSTANCE.lock().unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else {
transcription::initialize_transcription(
app_handle_clone,
Expand Down Expand Up @@ -320,6 +328,7 @@ impl RecordDesktop {
if !is_no_transcription {
stop_convert_tx.send(()).unwrap();
transcription::drop_transcription();
transcription_ja::drop_transcription_ja();
translation_en::drop_translation_en();
translation_ja::drop_translation_ja();
translation_ja_high::drop_translation_ja_high();
Expand Down
20 changes: 20 additions & 0 deletions src-tauri/src/module/transcription_amivoice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,26 @@ impl TranscriptionAmivoice {
fn convert(&mut self) -> Result<(), rusqlite::Error> {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
return vosk_speech.and_then(|speech| {
let reader = hound::WavReader::open(speech.wav.clone()).unwrap();

let spec = reader.spec();
let sample_rate = spec.sample_rate;
let is_too_short = (reader.duration() / sample_rate as u32) < 1;

if is_too_short {
println!("input is too short, so skipping...");
let mut updated = self
.sqlite
.update_model_vosk_to_whisper(speech.id, "".to_string())
.unwrap();
updated.content = speech.content;
self.app_handle
.clone()
.emit_all("finalTextConverted", updated)
.unwrap();
return Ok(());
}

let result = Self::request(
speech.wav,
self.token.clone(),
Expand Down
Loading

0 comments on commit 202066a

Please sign in to comment.