diff --git a/Cargo.toml b/Cargo.toml index b62fe90..2676f58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,4 +11,5 @@ tokio = { version = "1", features = ["rt-multi-thread"] } eframe = "0.22" egui = "0.22" font-kit = "0.11" -whisper_cli = "0.1" \ No newline at end of file +whisper_cli = "0.1" +clap_builder = "4" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 3c59258..4257f65 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ fn main() { fn run() { let option = NativeOptions { icon_data: None, - initial_window_size: Some(Vec2::new(400.0, 300.0)), + initial_window_size: Some(Vec2::new(400.0, 400.0)), follow_system_theme: true, centered: true, resizable: false, diff --git a/src/ui.rs b/src/ui.rs index 7ae92fe..003c7b7 100644 --- a/src/ui.rs +++ b/src/ui.rs @@ -1,22 +1,33 @@ use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use std::sync::atomic::Ordering; use std::thread; +use clap_builder::ValueEnum; use eframe::{CreationContext, Frame}; -use egui::{Context, FontId}; +use egui::{ComboBox, Context, FontId}; use egui::FontFamily::Proportional; use egui::TextStyle::*; use tokio::runtime::Runtime; +use whisper_cli::{Language, Size}; use crate::font::load_fonts; +use crate::utils::{ffmpeg_merge, MERGE, WHISPER, whisper}; -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct Conv { rt: Arc, files: Arc>, + config: Config, +} + +#[derive(Clone)] +struct Config { + lang: Language, + size: Size, } #[derive(Debug, Clone, Default)] -struct Files { - audio: Option, +pub struct Files { + pub audio: Option, image: Option, subtitle: Option, } @@ -44,6 +55,7 @@ impl Conv { Self { rt: Arc::new(rt), files: Default::default(), + config: Config { lang: Language::Auto, size: Size::Medium }, } } @@ -83,40 +95,27 @@ impl eframe::App for Conv { ctx.request_repaint(); egui::CentralPanel::default().show(ctx, |ui| { - ui.vertical_centered(|ui| ui.heading("Conv")); - - ui.separator(); - - ui.horizontal(|ui| { - ui.label("选择音频"); - if ui.button("打开").clicked() { - Conv::open_audio(self.files.clone()); - } - }); + if ui.button("选择音频").clicked() { + Conv::open_audio(self.files.clone()); + } ui.label(format!("音频: {}", if let Some(ref p) = self.files.lock().unwrap().audio { p.to_str().unwrap() } else { "None" })); - ui.horizontal(|ui| { - ui.label("选择背景图片"); - if ui.button("打开").clicked() { - Conv::open_image(self.files.clone()); - } - }); + if ui.button("选择背景图片").clicked() { + Conv::open_image(self.files.clone()); + } ui.label(format!("背景图片: {}", if let Some(ref p) = self.files.lock().unwrap().image { p.to_str().unwrap() } else { "None" })); - ui.horizontal(|ui| { - ui.label("选择字幕"); - if ui.button("打开").clicked() { - Conv::open_subtitle(self.files.clone()); - } - }); + if ui.button("选择字幕").clicked() { + Conv::open_subtitle(self.files.clone()); + } ui.label(format!("字幕: {}", if let Some(ref p) = self.files.lock().unwrap().subtitle { p.to_str().unwrap() } else { @@ -125,7 +124,42 @@ impl eframe::App for Conv { ui.separator(); + ui.label("Whisper"); + ComboBox::from_label("语言") + .selected_text(<&str>::from(self.config.lang)) + .show_ui(ui, |ui| { + ui.style_mut().wrap = Some(false); + for i in Language::value_variants() { + ui.selectable_value(&mut self.config.lang, *i, <&str>::from(*i)); + } + }); + ComboBox::from_label("模型") + .selected_text(format!("{}", self.config.size)) + .show_ui(ui, |ui| { + ui.style_mut().wrap = Some(false); + for i in Size::value_variants() { + ui.selectable_value(&mut self.config.size, *i, format!("{}", *i)); + } + }); + + if ui.button("音频 -> 字幕").clicked() { + if !WHISPER.load(Ordering::Relaxed) { + if let Some(ref path) = self.files.lock().unwrap().audio { + whisper(self.rt.clone(), path.clone(), self.config.lang, self.config.size); + } + } + } + ui.label(if WHISPER.load(Ordering::Relaxed) { "转换中" } else { "转换结束" }); + + ui.separator(); + if ui.button("合并音频/图片/字幕").clicked() { + if !MERGE.load(Ordering::Relaxed) { + let file = self.files.lock().unwrap(); + ffmpeg_merge(file.audio.clone(), file.image.clone(), file.subtitle.clone()); + } + } + ui.label(if MERGE.load(Ordering::Relaxed) { "合并中" } else { "合并结束" }); }); } } \ No newline at end of file diff --git a/src/utils.rs b/src/utils.rs index ef20dfd..3844811 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,45 +1,90 @@ -use std::{env, fs::File, io::{BufRead, BufReader, Write}}; -use std::path::{Path, PathBuf}; +use std::{fs, fs::File, io::Write, thread}; +use std::path::PathBuf; +use std::process::Command; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use tokio::runtime::Runtime; +use whisper_cli::{Language, Model, Size, Transcript, Whisper}; -fn is_number(s: &str) -> bool { - s.chars().all(|c| c.is_digit(10)) -} - -// 将srt时间格式转换为lrc时间格式 -fn format_time(time: &str) -> String { - let msec = &time[9..12]; - let sec = &time[6..8]; - let min = &time[3..5]; +pub static WHISPER: AtomicBool = AtomicBool::new(false); +pub static MERGE: AtomicBool = AtomicBool::new(false); - format!("[{}:{}.{:.2}]", min, sec, msec) +fn as_lrc(t: &Transcript) -> String { + t.word_utterances + .as_ref() + .unwrap_or(&t.utterances) + .iter() + .fold(String::new(), |lrc, fragment| { + lrc + + format!( + "[{:02}:{:02}.{:02}]{}\n", + fragment.start / 100 / 60, + fragment.start / 100, + fragment.start % 100, + fragment.text + ) + .as_str() + }) } -fn srt2lrc(path: &str) -> Result<(), Box> { - let output_filename = Path::new(path).with_extension("lrc"); - - let file = File::open(path)?; - let reader = BufReader::new(file); - let lines = reader.lines().filter_map(|l| l.ok()); - - let lrc_content = lines.fold(String::new(), |lrc, line| { - if is_number(&line) { - // 行号 - lrc - } else if line.is_empty() { - // 空行 - lrc - } else { - // 时间和文本内容 - let time = line.split_whitespace().nth(0).unwrap(); - let text = line.splitn(2, char::is_whitespace).nth(1).unwrap(); - - let lrc_time = format_time(time); - format!("{}{}\n", lrc, lrc_time + " " + text) +pub fn whisper(rt: Arc, path: PathBuf, lang: Language, size: Size) { + rt.spawn(async move { + WHISPER.store(true, Ordering::Relaxed); + let mut w = Whisper::new(Model::new(size), Some(lang)).await; + if let Ok(ref t) = w.transcribe(&path, false, false) { + let lrc = as_lrc(t); + let srt = t.as_srt(); + let path_lrc = path.with_extension("lrc"); + let path_srt = path.with_extension("srt"); + let mut file = File::create(path_lrc).unwrap(); + file.write_all(lrc.as_bytes()).unwrap(); + let mut file = File::create(path_srt).unwrap(); + file.write_all(srt.as_bytes()).unwrap(); } + WHISPER.store(false, Ordering::Relaxed); }); +} - let mut output_file = File::create(output_filename)?; - output_file.write_all(lrc_content.as_bytes())?; +pub fn ffmpeg_merge(audio: Option, image: Option, subtitle: Option) { + thread::spawn(move || { + MERGE.store(true, Ordering::Relaxed); + let mut cmd = Command::new("ffmpeg"); + if let Some(ref image) = image { + cmd.args([ + "-loop", + "1", + "-i", + image.to_str().unwrap(), + ]); + } + if let (Some(ref audio), Some(ref subtitle)) = (audio, subtitle) { + let output = audio.with_extension("mp4"); + if output.exists() { + fs::remove_file(output).unwrap_or(()); + } + cmd.args([ + "-i", + audio.to_str().unwrap(), + "-vf", + &format!("subtitles={}", subtitle.file_name().unwrap().to_str().unwrap()), + // "-c:v", + // "copy", + // "-c:a", + // "copy", + "-shortest", + audio.with_extension("mp4").to_str().unwrap(), + ]); + } else { + MERGE.store(false, Ordering::Relaxed); + return; + } + if let Ok(mut c) = cmd.spawn() { + if c.wait().is_err() { + MERGE.store(false, Ordering::Relaxed); + return; + } + } - Ok(()) -} + MERGE.store(false, Ordering::Relaxed); + }); +} \ No newline at end of file