Skip to content

Commit

Permalink
refactor(dictionary): simplify layered dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
kanru committed Jan 3, 2024
1 parent 8769b91 commit af1cd66
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 109 deletions.
17 changes: 0 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ include = ["src/**/*.rs", "Cargo.toml", "AUTHORS", "COPYING", "NEWS"]
bytemuck = { version = "1.14.0", features = ["derive"] }
cdb = "0.6.0"
directories = "5.0.0"
indexmap = "2.1.0"
riff = "2.0.0"
rusqlite = "0.30.0"
thiserror = "1.0.0"
Expand Down
5 changes: 2 additions & 3 deletions src/capi/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pub extern "C" fn chewing_new2(
.expect("invalid syspath string");
SystemDictionaryLoader::new().sys_path(search_path).load()
};
let mut dictionaries = match dictionaries {
let dictionaries = match dictionaries {
Some(d) => d,
None => return null_mut(),
};
Expand All @@ -154,7 +154,6 @@ pub extern "C" fn chewing_new2(
Some(d) => d,
None => return null_mut(),
};
dictionaries.insert(0, user_dictionary);

let estimate = if userpath.is_null() {
UserFreqEstimateLoader::new().load()
Expand All @@ -171,7 +170,7 @@ pub extern "C" fn chewing_new2(
None => return null_mut(),
};

let dict = LayeredDictionary::new(dictionaries, vec![]);
let dict = LayeredDictionary::new(dictionaries, user_dictionary);
let conversion_engine = ChewingEngine::new();
let kb_compat = KeyboardLayoutCompat::Default;
let keyboard = AnyKeyboardLayout::Qwerty(Qwerty);
Expand Down
106 changes: 46 additions & 60 deletions src/dictionary/layered.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use std::hash::{Hash, Hasher};

use indexmap::IndexSet;
use std::{
cmp,
collections::{hash_map::Entry, HashMap},
hash::{Hash, Hasher},
iter,
};

use crate::zhuyin::SyllableSlice;

use super::{BlockList, DictEntries, Dictionary, DictionaryInfo, DictionaryUpdateError, Phrase};
use super::{DictEntries, Dictionary, DictionaryInfo, DictionaryUpdateError, Phrase};

/// A collection of dictionaries that returns the union of the lookup results.
/// # Examples
Expand All @@ -28,7 +31,7 @@ use super::{BlockList, DictEntries, Dictionary, DictionaryInfo, DictionaryUpdate
///
/// let user_block_list = Box::new(HashSet::from(["側".to_string()]));
///
/// let dict = LayeredDictionary::new(vec![sys_dict, user_dict], vec![user_block_list]);
/// let dict = LayeredDictionary::new(vec![sys_dict], user_dict, user_block_list);
/// assert_eq!(
/// [
/// ("策", 100).into(),
Expand All @@ -48,27 +51,23 @@ use super::{BlockList, DictEntries, Dictionary, DictionaryInfo, DictionaryUpdate
/// ```
#[derive(Debug)]
pub struct LayeredDictionary {
inner: Vec<Box<dyn Dictionary>>,
blocked: Vec<Box<dyn BlockList>>,
sys_dict: Vec<Box<dyn Dictionary>>,
user_dict: Box<dyn Dictionary>,
}

impl LayeredDictionary {
/// Creates a new `LayeredDictionary` with the list of dictionaries and
/// block lists.
/// Creates a new `LayeredDictionary` with the list of dictionaries.
pub fn new(
dictionaries: Vec<Box<dyn Dictionary>>,
block_lists: Vec<Box<dyn BlockList>>,
sys_dict: Vec<Box<dyn Dictionary>>,
user_dict: Box<dyn Dictionary>,
) -> LayeredDictionary {
LayeredDictionary {
inner: dictionaries,
blocked: block_lists,
sys_dict,
user_dict,
}
}
fn is_blocked(&self, phrase: &str) -> bool {
self.blocked.iter().any(|b| b.is_blocked(phrase))
}
pub fn base(&mut self) -> &mut dyn Dictionary {
self.inner[0].as_mut()
pub fn user_dict(&mut self) -> &mut dyn Dictionary {
self.user_dict.as_mut()
}
}

Expand All @@ -81,38 +80,36 @@ impl Dictionary for LayeredDictionary {
///
/// ```pseudo_code
/// Set phrases = list()
/// Set [d_base, d_layers] = d_list
/// Foreach phrase, freq in d_base.lookup(syllables)
/// Add phrases <- (phrase, freq)
/// Foreach d in d_layers
/// Foreach phrase, freq in d.lookup_syllables)
/// Foreach phrase, freq in d.lookup_syllables()
/// If phrase in phrases
/// Set phrases[phrase].freq = freq
/// Set phrases[phrase].freq = max(phrases[phrase].freq, freq)
/// Else
/// Add phrases <- (phrase, freq)
/// ```
fn lookup_first_n_phrases(&self, syllables: &dyn SyllableSlice, first: usize) -> Vec<Phrase> {
let (base, layers) = match self.inner.split_first() {
Some(d) => d,
None => return vec![],
};
let mut phrases = IndexSet::with_capacity(128);
phrases.extend(
base.lookup_all_phrases(syllables)
.into_iter()
.map(LookupPhrase),
);
for d in layers {
for phrase in d.lookup_all_phrases(syllables) {
phrases.replace(LookupPhrase(phrase));
}
}
let mut sort_map: HashMap<String, usize> = HashMap::new();
let mut phrases: Vec<Phrase> = Vec::new();

self.sys_dict
.iter()
.chain(iter::once(&self.user_dict))
.for_each(|d| {
for phrase in d.lookup_all_phrases(syllables) {
match sort_map.entry(phrase.to_string()) {
Entry::Occupied(entry) => {
let index = *entry.get();
phrases[index] = cmp::max(&phrase, &phrases[index]).clone();
}
Entry::Vacant(entry) => {
entry.insert(phrases.len());
phrases.push(phrase);
}
}
}
});
phrases.truncate(first);
phrases
.into_iter()
.map(|p| p.0)
.filter(|phrase| !self.is_blocked(&phrase.phrase))
.take(first)
.collect()
}

fn entries(&self) -> Option<DictEntries> {
Expand All @@ -127,23 +124,19 @@ impl Dictionary for LayeredDictionary {
}

fn reopen(&mut self) -> Result<(), DictionaryUpdateError> {
self.inner.iter_mut().map(|it| it.reopen()).collect()
self.user_dict.reopen()
}

fn flush(&mut self) -> Result<(), DictionaryUpdateError> {
self.inner.iter_mut().map(|it| it.flush()).collect()
self.user_dict.flush()
}

fn add_phrase(
&mut self,
syllables: &dyn SyllableSlice,
phrase: Phrase,
) -> Result<(), DictionaryUpdateError> {
for dict in &mut self.inner {
// TODO check mutability?
let _ = dict.add_phrase(syllables, phrase.clone());
}
Ok(())
self.user_dict.add_phrase(syllables, phrase)
}

fn update_phrase(
Expand All @@ -153,23 +146,16 @@ impl Dictionary for LayeredDictionary {
user_freq: u32,
time: u64,
) -> Result<(), DictionaryUpdateError> {
for dict in &mut self.inner {
// TODO check mutability?
let _ = dict.update_phrase(syllables, phrase.clone(), user_freq, time);
}
Ok(())
self.user_dict
.update_phrase(syllables, phrase, user_freq, time)
}

fn remove_phrase(
&mut self,
syllables: &dyn SyllableSlice,
phrase_str: &str,
) -> Result<(), DictionaryUpdateError> {
for dict in &mut self.inner {
// TODO check mutability?
let _ = dict.remove_phrase(syllables, phrase_str);
}
Ok(())
self.user_dict.remove_phrase(syllables, phrase_str)
}
}

Expand Down
20 changes: 1 addition & 19 deletions src/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
any::Any,
borrow::Borrow,
cmp::Ordering,
collections::{HashMap, HashSet},
collections::HashMap,
fmt::{Debug, Display},
path::Path,
};
Expand Down Expand Up @@ -462,24 +462,6 @@ impl Dictionary for HashMap<Vec<Syllable>, Vec<Phrase>> {
}
}

/// A block list contains unwanted phrases.
pub trait BlockList: Debug {
/// Returns if whether a phrase is in the block list.
fn is_blocked(&self, phrase: &str) -> bool;
}

impl BlockList for HashSet<String> {
fn is_blocked(&self, phrase: &str) -> bool {
self.contains(phrase)
}
}

impl BlockList for () {
fn is_blocked(&self, _phrase: &str) -> bool {
false
}
}

#[cfg(test)]
mod tests {
use std::collections::HashMap;
Expand Down
18 changes: 9 additions & 9 deletions src/editor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ where
C: ConversionEngine<LayeredDictionary>,
{
pub fn user_dict(&mut self) -> &mut dyn Dictionary {
self.dict.base()
self.dict.user_dict()
}
}

Expand All @@ -554,8 +554,8 @@ where
self.try_auto_commit();
}
if self.dirty_dict {
let _ = self.user_dict().reopen();
let _ = self.user_dict().flush();
let _ = self.dict.reopen();
let _ = self.dict.flush();
self.dirty_dict = false;
}
self.last_key_behavior()
Expand Down Expand Up @@ -1293,7 +1293,7 @@ mod tests {
#[test]
fn editing_mode_input_bopomofo() {
let keyboard = Qwerty;
let dict = LayeredDictionary::new(vec![Box::new(HashMap::new())], vec![]);
let dict = LayeredDictionary::new(vec![Box::new(HashMap::new())], Box::new(HashMap::new()));
let conversion_engine = ChewingEngine::new();
let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap();
let mut editor = Editor::new(conversion_engine, dict, estimate);
Expand All @@ -1318,7 +1318,7 @@ mod tests {
vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]],
vec![("冊", 100).into()],
)]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new()));
let conversion_engine = ChewingEngine::new();
let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap();
let mut editor = Editor::new(conversion_engine, dict, estimate);
Expand Down Expand Up @@ -1349,7 +1349,7 @@ mod tests {
vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]],
vec![("冊", 100).into()],
)]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new()));
let conversion_engine = ChewingEngine::new();
let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap();
let mut editor = Editor::new(conversion_engine, dict, estimate);
Expand Down Expand Up @@ -1390,7 +1390,7 @@ mod tests {
vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]],
vec![("冊", 100).into()],
)]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new()));
let conversion_engine = ChewingEngine::new();
let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap();
let mut editor = Editor::new(conversion_engine, dict, estimate);
Expand Down Expand Up @@ -1434,7 +1434,7 @@ mod tests {
vec![crate::syl![Bopomofo::C, Bopomofo::E, Bopomofo::TONE4]],
vec![("冊", 100).into()],
)]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new()));
let conversion_engine = ChewingEngine::new();
let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap();
let mut editor = Editor::new(conversion_engine, dict, estimate);
Expand Down Expand Up @@ -1470,7 +1470,7 @@ mod tests {
fn editing_mode_input_full_shape_symbol() {
let keyboard = Qwerty;
let dict = HashMap::new();
let dict = LayeredDictionary::new(vec![Box::new(dict)], vec![]);
let dict = LayeredDictionary::new(vec![Box::new(dict)], Box::new(HashMap::new()));
let conversion_engine = ChewingEngine::new();
let estimate = SqliteUserFreqEstimate::open_in_memory().unwrap();
let mut editor = Editor::new(conversion_engine, dict, estimate);
Expand Down

0 comments on commit af1cd66

Please sign in to comment.