Skip to content

Commit

Permalink
Merge pull request #148 from solaoi/feature_amivoice
Browse files Browse the repository at this point in the history
Amivoice対応
  • Loading branch information
solaoi authored May 22, 2024
2 parents 63de899 + aa88b3f commit 671daef
Show file tree
Hide file tree
Showing 12 changed files with 328 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src-tauri/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ unicode-segmentation = "1.9.0"
once_cell = "1.13.1"
crossbeam-channel = "0.5.6"
chrono = "0.4.22"
hound = "3.4.0"
hound = "3.5.1"
rusqlite = "*"
samplerate-rs = "0.1.0"
# download model and openai request
Expand Down
4 changes: 4 additions & 0 deletions src-tauri/migrations/001.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ VALUES("transcriptionAccuracy", "off");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingKeyOpenai", "");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingKeyAmivoice", "");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingLanguage", "日本語");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingTemplate", "");
Expand All @@ -44,6 +46,8 @@ VALUES("settingResource", "");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingModel", "gpt-3.5-turbo");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingAmiVoiceModel", "general");
INSERT INTO settings(setting_name, setting_status)
VALUES("settingAILanguage", "None");
INSERT INTO settings(setting_name, setting_status)
VALUES(
Expand Down
7 changes: 7 additions & 0 deletions src-tauri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use module::{
screenshot::{self, AppWindow},
transcription::{TraceCompletion, Transcription},
transcription_online::TranscriptionOnline,
transcription_amivoice::TranscriptionAmivoice,
};

struct RecordState(Arc<Mutex<Option<Sender<()>>>>);
Expand Down Expand Up @@ -189,6 +190,12 @@ fn start_trace_command(
note_id,
);
transcription_online.start(stop_convert_rx, true);
} else if transcription_accuracy.starts_with("online-amivoice") {
let mut transcription_amivoice = TranscriptionAmivoice::new(
window.app_handle(),
note_id,
);
transcription_amivoice.start(stop_convert_rx, true);
} else if transcription_accuracy.starts_with("online-chat") {
let mut chat_online = ChatOnline::new(window.app_handle(), speaker_language, note_id);
chat_online.start(stop_convert_rx, true);
Expand Down
1 change: 1 addition & 0 deletions src-tauri/src/module/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod record_desktop;
mod sqlite;
mod transcriber;
pub mod transcription;
pub mod transcription_amivoice;
pub mod transcription_online;
mod writer;
pub mod screenshot;
15 changes: 13 additions & 2 deletions src-tauri/src/module/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use crossbeam_channel::{unbounded, Receiver};
use tauri::{api::path::data_dir, AppHandle, Manager};

use super::{
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_online,
writer::Writer,
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice,
transcription_online, writer::Writer,
};

pub struct Record {
Expand Down Expand Up @@ -198,6 +198,16 @@ impl Record {
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("online-amivoice") {
transcription_amivoice::initialize_transcription_amivoice(
app_handle_clone,
note_id,
);
let mut lock =
transcription_amivoice::SINGLETON_INSTANCE.lock().unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("online-chat") {
chat_online::initialize_chat_online(
app_handle_clone,
Expand Down Expand Up @@ -245,6 +255,7 @@ impl Record {
stop_convert_tx.send(()).unwrap();
transcription::drop_transcription();
transcription_online::drop_transcription_online();
transcription_amivoice::drop_transcription_amivoice();
chat_online::drop_chat_online();
} else {
drop(stop_convert_tx)
Expand Down
15 changes: 13 additions & 2 deletions src-tauri/src/module/record_desktop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ use screencapturekit::{
use vosk::Recognizer;

use super::{
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_online,
writer::Writer,
chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice,
transcription_online, writer::Writer,
};

pub struct RecordDesktop {
Expand Down Expand Up @@ -230,6 +230,16 @@ impl RecordDesktop {
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("online-amivoice") {
transcription_amivoice::initialize_transcription_amivoice(
app_handle_clone,
note_id,
);
let mut lock =
transcription_amivoice::SINGLETON_INSTANCE.lock().unwrap();
if let Some(singleton) = lock.as_mut() {
singleton.start(stop_convert_rx_clone, false);
}
} else if transcription_accuracy_clone.starts_with("online-chat") {
chat_online::initialize_chat_online(
app_handle_clone,
Expand Down Expand Up @@ -281,6 +291,7 @@ impl RecordDesktop {
stop_convert_tx.send(()).unwrap();
transcription::drop_transcription();
transcription_online::drop_transcription_online();
transcription_amivoice::drop_transcription_amivoice();
chat_online::drop_chat_online();
} else {
drop(stop_convert_tx)
Expand Down
16 changes: 16 additions & 0 deletions src-tauri/src/module/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@ impl Sqlite {
);
}

pub fn select_amivoice_token(&self) -> Result<String, rusqlite::Error> {
return self.conn.query_row(
"SELECT setting_status FROM settings WHERE setting_name = \"settingKeyAmivoice\"",
params![],
|row| Ok(row.get_unwrap(0)),
);
}

pub fn select_ai_language(&self) -> Result<String, rusqlite::Error> {
return self.conn.query_row(
"SELECT setting_status FROM settings WHERE setting_name = \"settingAILanguage\"",
Expand All @@ -100,6 +108,14 @@ impl Sqlite {
);
}

pub fn select_amivoice_model(&self) -> Result<String, rusqlite::Error> {
return self.conn.query_row(
"SELECT setting_status FROM settings WHERE setting_name = \"settingAmiVoiceModel\"",
params![],
|row| Ok(row.get_unwrap(0)),
);
}

pub fn select_ai_resource(&self) -> Result<String, rusqlite::Error> {
return self.conn.query_row(
"SELECT setting_status FROM settings WHERE setting_name = \"settingResource\"",
Expand Down
186 changes: 186 additions & 0 deletions src-tauri/src/module/transcription_amivoice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
use hound::{SampleFormat, WavReader, WavSpec, WavWriter};
use tokio::{fs::File, io::AsyncReadExt};

use super::sqlite::Sqlite;

use crossbeam_channel::Receiver;

use reqwest::{multipart, Client};
use serde_json::Value;
use std::io::Cursor;
use std::sync::Mutex;
use tauri::{AppHandle, Manager};

#[derive(Debug, Clone, serde::Serialize)]
pub struct TraceCompletion {}

pub struct TranscriptionAmivoice {
app_handle: AppHandle,
sqlite: Sqlite,
note_id: u64,
token: String,
model: String,
}

impl TranscriptionAmivoice {
pub fn new(app_handle: AppHandle, note_id: u64) -> Self {
let sqlite = Sqlite::new();
let token = sqlite.select_amivoice_token().unwrap();
let model = sqlite.select_amivoice_model().unwrap();
Self {
app_handle,
sqlite,
note_id,
token,
model,
}
}

pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) {
while Self::convert(self).is_ok() {
if is_continuous {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
if vosk_speech.is_err() {
self.app_handle
.clone()
.emit_all("traceCompletion", TraceCompletion {})
.unwrap();
break;
}
}
if stop_convert_rx.try_recv().is_ok() {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
if vosk_speech.is_err() {
self.app_handle
.clone()
.emit_all("traceCompletion", TraceCompletion {})
.unwrap();
} else {
self.app_handle
.clone()
.emit_all("traceUnCompletion", TraceCompletion {})
.unwrap();
}
break;
}
}
}

#[tokio::main]
async fn request(
file_path: String,
token: String,
model: String,
) -> Result<String, Box<dyn std::error::Error>> {
let url = "https://acp-api.amivoice.com/v1/nolog/recognize";
let client = Client::new();

let mut file = File::open(file_path).await?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer).await?;

let cursor = Cursor::new(buffer);
let mut reader = WavReader::new(cursor)?;
let spec = WavSpec {
channels: 1,
sample_rate: 48000,
bits_per_sample: 16,
sample_format: SampleFormat::Int,
};
let mut converted_buffer = Vec::new();
{
let mut cursor = Cursor::new(&mut converted_buffer);
let mut writer = WavWriter::new(&mut cursor, spec)?;

match reader.spec().sample_format {
SampleFormat::Int => {
for sample in reader.samples::<i32>() {
match sample {
Ok(sample) => {
let scaled_sample = (sample >> 16) as i16;
writer.write_sample(scaled_sample)?;
}
Err(e) => {
eprintln!("Error reading sample: {:?}", e);
}
}
}
}
SampleFormat::Float => {
for sample in reader.samples::<f32>() {
match sample {
Ok(sample) => {
let scaled_sample = (sample * i16::MAX as f32)
.clamp(i16::MIN as f32, i16::MAX as f32)
as i16;
writer.write_sample(scaled_sample)?;
}
Err(e) => {
eprintln!("Error reading sample: {:?}", e);
}
}
}
}
}
writer.finalize()?;
}

let part_file = multipart::Part::bytes(converted_buffer)
.file_name("test.wav")
.mime_str("audio/wav")?;
let part_model = multipart::Part::text(format!("grammarFileNames=-a-{}", model));
let part_token = multipart::Part::text(token.clone());

let form = multipart::Form::new()
.part("u", part_token)
.part("d", part_model)
.part("a", part_file);

let response = client.post(url).multipart(form).send().await?;

println!("Status: {}", response.status());
let json_response: Value = response.json().await?;
println!("Response: {:?}", json_response);
let response_text = json_response["results"][0]["text"]
.as_str()
.unwrap_or("text field not found");

Ok(response_text.to_string())
}

fn convert(&mut self) -> Result<(), rusqlite::Error> {
let vosk_speech = self.sqlite.select_vosk(self.note_id);
return vosk_speech.and_then(|speech| {
let result = Self::request(speech.wav, self.token.clone(), self.model.clone());

if result.is_ok() {
let updated = self
.sqlite
.update_model_vosk_to_whisper(speech.id, result.unwrap());

self.app_handle
.clone()
.emit_all("finalTextConverted", updated.unwrap())
.unwrap();
} else {
println!("amivoice api is temporally failed, so skipping...")
}

Ok(())
});
}
}

pub static SINGLETON_INSTANCE: Mutex<Option<TranscriptionAmivoice>> = Mutex::new(None);

pub fn initialize_transcription_amivoice(app_handle: AppHandle, note_id: u64) {
let mut singleton = SINGLETON_INSTANCE.lock().unwrap();
if singleton.is_none() {
*singleton = Some(TranscriptionAmivoice::new(app_handle, note_id));
}
}

pub fn drop_transcription_amivoice() {
let mut singleton = SINGLETON_INSTANCE.lock().unwrap();
*singleton = None;
}
42 changes: 42 additions & 0 deletions src/components/molecules/SettingAmiVoiceModel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { ChangeEvent } from "react";
import { useRecoilState } from 'recoil';
import { settingKeyState } from "../../store/atoms/settingKeyState";

const SettingAmiVoiceModel = (): JSX.Element => {
const settingModels = ["general", "medgeneral", "bizmrreport", "bizfinance", "bizinsurance"] as const;
const modelNameMapper = (model: typeof settingModels[number])=>{
switch(model){
case "general":
return "汎用"
case "medgeneral":
return "医療"
case "bizmrreport":
return "製薬"
case "bizfinance":
return "金融"
case "bizinsurance":
return "保険"
}
}
const [settingKey, setSettingKey] = useRecoilState(settingKeyState("settingAmiVoiceModel"))

const change = (e: ChangeEvent<HTMLSelectElement>) => {
const settingKey = e.target.value
setSettingKey(settingKey)
}

return (
<div className="flex items-center mb-2">
<p className="w-[12rem]">利用モデル</p>
<div className="flex flex-col w-full">
<select className="select select-bordered focus:outline-none text-xs w-fit" name="setting-amivoice-model" onChange={change} >
{settingModels?.map((model, i) => (
<option key={"setting-model" + i} value={model} selected={model === settingKey}>{modelNameMapper(model)}</option>
))}
</select>
</div>
</div>
)
}

export { SettingAmiVoiceModel }
Loading

0 comments on commit 671daef

Please sign in to comment.