From af1cd6658f57a3afa645adf455ed3be05100710e Mon Sep 17 00:00:00 2001 From: Kan-Ru Chen Date: Wed, 3 Jan 2024 22:30:39 +0900 Subject: [PATCH] refactor(dictionary): simplify layered dictionary --- Cargo.lock | 17 ------ Cargo.toml | 1 - src/capi/io.rs | 5 +- src/dictionary/layered.rs | 106 +++++++++++++++++--------------------- src/dictionary/mod.rs | 20 +------ src/editor/mod.rs | 18 +++---- 6 files changed, 58 insertions(+), 109 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5efe5510c..fdf95b1fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,7 +130,6 @@ dependencies = [ "bytemuck", "cdb", "directories", - "indexmap", "riff", "rusqlite", "tempfile", @@ -171,12 +170,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - [[package]] name = "errno" version = "0.3.8" @@ -245,16 +238,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "indexmap" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" -dependencies = [ - "equivalent", - "hashbrown", -] - [[package]] name = "lazy_static" version = "1.4.0" diff --git a/Cargo.toml b/Cargo.toml index ee85da5a2..2f74e96f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ include = ["src/**/*.rs", "Cargo.toml", "AUTHORS", "COPYING", "NEWS"] bytemuck = { version = "1.14.0", features = ["derive"] } cdb = "0.6.0" directories = "5.0.0" -indexmap = "2.1.0" riff = "2.0.0" rusqlite = "0.30.0" thiserror = "1.0.0" diff --git a/src/capi/io.rs b/src/capi/io.rs index e87eaa67b..21c01084a 100644 --- a/src/capi/io.rs +++ b/src/capi/io.rs @@ -136,7 +136,7 @@ pub extern "C" fn chewing_new2( .expect("invalid syspath string"); SystemDictionaryLoader::new().sys_path(search_path).load() }; - let mut dictionaries = match dictionaries { + let dictionaries = match dictionaries { Some(d) => d, None => return null_mut(), }; @@ -154,7 +154,6 @@ pub extern "C" fn chewing_new2( Some(d) => d, None => return null_mut(), }; - dictionaries.insert(0, user_dictionary); let estimate = if userpath.is_null() { UserFreqEstimateLoader::new().load() @@ -171,7 +170,7 @@ pub extern "C" fn chewing_new2( None => return null_mut(), }; - let dict = LayeredDictionary::new(dictionaries, vec![]); + let dict = LayeredDictionary::new(dictionaries, user_dictionary); let conversion_engine = ChewingEngine::new(); let kb_compat = KeyboardLayoutCompat::Default; let keyboard = AnyKeyboardLayout::Qwerty(Qwerty); diff --git a/src/dictionary/layered.rs b/src/dictionary/layered.rs index b427888d1..88ca1d63f 100644 --- a/src/dictionary/layered.rs +++ b/src/dictionary/layered.rs @@ -1,10 +1,13 @@ -use std::hash::{Hash, Hasher}; - -use indexmap::IndexSet; +use std::{ + cmp, + collections::{hash_map::Entry, HashMap}, + hash::{Hash, Hasher}, + iter, +}; use crate::zhuyin::SyllableSlice; -use super::{BlockList, DictEntries, Dictionary, DictionaryInfo, DictionaryUpdateError, Phrase}; +use super::{DictEntries, Dictionary, DictionaryInfo, DictionaryUpdateError, Phrase}; /// A collection of dictionaries that returns the union of the lookup results. /// # Examples @@ -28,7 +31,7 @@ use super::{BlockList, DictEntries, Dictionary, DictionaryInfo, DictionaryUpdate /// /// let user_block_list = Box::new(HashSet::from(["側".to_string()])); /// -/// let dict = LayeredDictionary::new(vec![sys_dict, user_dict], vec![user_block_list]); +/// let dict = LayeredDictionary::new(vec![sys_dict], user_dict, user_block_list); /// assert_eq!( /// [ /// ("策", 100).into(), @@ -48,27 +51,23 @@ use super::{BlockList, DictEntries, Dictionary, DictionaryInfo, DictionaryUpdate /// ``` #[derive(Debug)] pub struct LayeredDictionary { - inner: Vec>, - blocked: Vec>, + sys_dict: Vec>, + user_dict: Box, } impl LayeredDictionary { - /// Creates a new `LayeredDictionary` with the list of dictionaries and - /// block lists. + /// Creates a new `LayeredDictionary` with the list of dictionaries. pub fn new( - dictionaries: Vec>, - block_lists: Vec>, + sys_dict: Vec>, + user_dict: Box, ) -> LayeredDictionary { LayeredDictionary { - inner: dictionaries, - blocked: block_lists, + sys_dict, + user_dict, } } - fn is_blocked(&self, phrase: &str) -> bool { - self.blocked.iter().any(|b| b.is_blocked(phrase)) - } - pub fn base(&mut self) -> &mut dyn Dictionary { - self.inner[0].as_mut() + pub fn user_dict(&mut self) -> &mut dyn Dictionary { + self.user_dict.as_mut() } } @@ -81,38 +80,36 @@ impl Dictionary for LayeredDictionary { /// /// ```pseudo_code /// Set phrases = list() - /// Set [d_base, d_layers] = d_list - /// Foreach phrase, freq in d_base.lookup(syllables) - /// Add phrases <- (phrase, freq) /// Foreach d in d_layers - /// Foreach phrase, freq in d.lookup_syllables) + /// Foreach phrase, freq in d.lookup_syllables() /// If phrase in phrases - /// Set phrases[phrase].freq = freq + /// Set phrases[phrase].freq = max(phrases[phrase].freq, freq) /// Else /// Add phrases <- (phrase, freq) /// ``` fn lookup_first_n_phrases(&self, syllables: &dyn SyllableSlice, first: usize) -> Vec { - let (base, layers) = match self.inner.split_first() { - Some(d) => d, - None => return vec![], - }; - let mut phrases = IndexSet::with_capacity(128); - phrases.extend( - base.lookup_all_phrases(syllables) - .into_iter() - .map(LookupPhrase), - ); - for d in layers { - for phrase in d.lookup_all_phrases(syllables) { - phrases.replace(LookupPhrase(phrase)); - } - } + let mut sort_map: HashMap = HashMap::new(); + let mut phrases: Vec = Vec::new(); + + self.sys_dict + .iter() + .chain(iter::once(&self.user_dict)) + .for_each(|d| { + for phrase in d.lookup_all_phrases(syllables) { + match sort_map.entry(phrase.to_string()) { + Entry::Occupied(entry) => { + let index = *entry.get(); + phrases[index] = cmp::max(&phrase, &phrases[index]).clone(); + } + Entry::Vacant(entry) => { + entry.insert(phrases.len()); + phrases.push(phrase); + } + } + } + }); + phrases.truncate(first); phrases - .into_iter() - .map(|p| p.0) - .filter(|phrase| !self.is_blocked(&phrase.phrase)) - .take(first) - .collect() } fn entries(&self) -> Option { @@ -127,11 +124,11 @@ impl Dictionary for LayeredDictionary { } fn reopen(&mut self) -> Result<(), DictionaryUpdateError> { - self.inner.iter_mut().map(|it| it.reopen()).collect() + self.user_dict.reopen() } fn flush(&mut self) -> Result<(), DictionaryUpdateError> { - self.inner.iter_mut().map(|it| it.flush()).collect() + self.user_dict.flush() } fn add_phrase( @@ -139,11 +136,7 @@ impl Dictionary for LayeredDictionary { syllables: &dyn SyllableSlice, phrase: Phrase, ) -> Result<(), DictionaryUpdateError> { - for dict in &mut self.inner { - // TODO check mutability? - let _ = dict.add_phrase(syllables, phrase.clone()); - } - Ok(()) + self.user_dict.add_phrase(syllables, phrase) } fn update_phrase( @@ -153,11 +146,8 @@ impl Dictionary for LayeredDictionary { user_freq: u32, time: u64, ) -> Result<(), DictionaryUpdateError> { - for dict in &mut self.inner { - // TODO check mutability? - let _ = dict.update_phrase(syllables, phrase.clone(), user_freq, time); - } - Ok(()) + self.user_dict + .update_phrase(syllables, phrase, user_freq, time) } fn remove_phrase( @@ -165,11 +155,7 @@ impl Dictionary for LayeredDictionary { syllables: &dyn SyllableSlice, phrase_str: &str, ) -> Result<(), DictionaryUpdateError> { - for dict in &mut self.inner { - // TODO check mutability? - let _ = dict.remove_phrase(syllables, phrase_str); - } - Ok(()) + self.user_dict.remove_phrase(syllables, phrase_str) } } diff --git a/src/dictionary/mod.rs b/src/dictionary/mod.rs index 7765065dc..b7b62e0eb 100644 --- a/src/dictionary/mod.rs +++ b/src/dictionary/mod.rs @@ -4,7 +4,7 @@ use std::{ any::Any, borrow::Borrow, cmp::Ordering, - collections::{HashMap, HashSet}, + collections::HashMap, fmt::{Debug, Display}, path::Path, }; @@ -462,24 +462,6 @@ impl Dictionary for HashMap, Vec> { } } -/// A block list contains unwanted phrases. -pub trait BlockList: Debug { - /// Returns if whether a phrase is in the block list. - fn is_blocked(&self, phrase: &str) -> bool; -} - -impl BlockList for HashSet { - fn is_blocked(&self, phrase: &str) -> bool { - self.contains(phrase) - } -} - -impl BlockList for () { - fn is_blocked(&self, _phrase: &str) -> bool { - false - } -} - #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/editor/mod.rs b/src/editor/mod.rs index f62707f99..eb7007ee8 100644 --- a/src/editor/mod.rs +++ b/src/editor/mod.rs @@ -529,7 +529,7 @@ where C: ConversionEngine, { pub fn user_dict(&mut self) -> &mut dyn Dictionary { - self.dict.base() + self.dict.user_dict() } } @@ -554,8 +554,8 @@ where self.try_auto_commit(); } if self.dirty_dict { - let _ = self.user_dict().reopen(); - let _ = self.user_dict().flush(); + let _ = self.dict.reopen(); + let _ = self.dict.flush(); self.dirty_dict = false; } self.last_key_behavior() @@ -1293,7 +1293,7 @@ mod tests { #[test] fn editing_mode_input_bopomofo() { let keyboard = Qwerty; - let dict = LayeredDictionary::new(vec![Box::new(HashMap::new())], vec![]); + let dict = LayeredDictionary::new(vec![Box::new(HashMap::new())], Box::new(HashMap::new())); let conversion_engine = ChewingEngine::new(); let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap(); let mut editor = Editor::new(conversion_engine, dict, estimate); @@ -1318,7 +1318,7 @@ mod tests { vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]], vec![("冊", 100).into()], )]); - let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]); + let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new())); let conversion_engine = ChewingEngine::new(); let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap(); let mut editor = Editor::new(conversion_engine, dict, estimate); @@ -1349,7 +1349,7 @@ mod tests { vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]], vec![("冊", 100).into()], )]); - let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]); + let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new())); let conversion_engine = ChewingEngine::new(); let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap(); let mut editor = Editor::new(conversion_engine, dict, estimate); @@ -1390,7 +1390,7 @@ mod tests { vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]], vec![("冊", 100).into()], )]); - let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]); + let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new())); let conversion_engine = ChewingEngine::new(); let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap(); let mut editor = Editor::new(conversion_engine, dict, estimate); @@ -1434,7 +1434,7 @@ mod tests { vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]], vec![("冊", 100).into()], )]); - let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]); + let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new())); let conversion_engine = ChewingEngine::new(); let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap(); let mut editor = Editor::new(conversion_engine, dict, estimate); @@ -1470,7 +1470,7 @@ mod tests { fn editing_mode_input_full_shape_symbol() { let keyboard = Qwerty; let dict = HashMap::new(); - let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]); + let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new())); let conversion_engine = ChewingEngine::new(); let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap(); let mut editor = Editor::new(conversion_engine, dict, estimate);