Skip to content

Commit

Permalink
Merge pull request #422 from chewing/rust/cdb-userphrase
Browse files Browse the repository at this point in the history
feat(dict): implement CDB based user dictionary
  • Loading branch information
kanru authored Jan 6, 2024
2 parents d445946 + e522392 commit 080b261
Show file tree
Hide file tree
Showing 26 changed files with 1,324 additions and 678 deletions.
31 changes: 11 additions & 20 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,22 @@ jobs:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
with_rust: ['true', 'false']
with_hash: ['true', 'false']
rust_version: ['1.70']
include:
- os: ubuntu-latest
with_rust: true
rust_version: stable
with_hash: true
- os: ubuntu-latest
with_rust: true
rust_version: stable
with_hash: false
exclude:
- with_rust: 'false'
rust_version: 'stable'
runs-on: ${{ matrix.os }}
name: ${{ matrix.os }}, with_rust=${{ matrix.with_rust }}, rust_version=${{ matrix.rust_version }}
name: ${{ matrix.os }}, with_rust=${{ matrix.with_rust }}, with_hash=${{ matrix.with_hash }} rust_version=${{ matrix.rust_version }}

steps:
- uses: actions/checkout@v3
Expand All @@ -51,23 +57,7 @@ jobs:
rustup update
- name: Build
run: cargo xtask build --build-type ${{env.BUILD_TYPE}} --with-rust ${{matrix.with_rust}} --verbose true

- name: Test
run: cargo xtask test --build-type ${{env.BUILD_TYPE}}

build_with_hash:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
submodules: 'true'

- run: sudo apt-get install ninja-build

- name: Build
run: cargo xtask build --build-type ${{env.BUILD_TYPE}} --with-rust false --with-hash true --verbose true
run: cargo xtask build --build-type ${{env.BUILD_TYPE}} --with-rust ${{matrix.with_rust}} --with-hash ${{matrix.with_hash}} --verbose true

- name: Test
run: cargo xtask test --build-type ${{env.BUILD_TYPE}}
Expand All @@ -76,8 +66,9 @@ jobs:
strategy:
matrix:
with_rust: ['true', 'false']
with_hash: ['true', 'false']
runs-on: ubuntu-latest
name: Coverage with_rust=${{ matrix.with_rust }}
name: Coverage with_rust=${{ matrix.with_rust }} with_hash=${{ matrix.with_hash }}

steps:
- uses: actions/checkout@v3
Expand All @@ -101,7 +92,7 @@ jobs:
- name: Build
env:
CC: clang
run: cargo xtask build --build-type Debug --with-rust ${{matrix.with_rust}} --with-coverage true --verbose true
run: cargo xtask build --build-type Debug --with-rust ${{matrix.with_rust}} --with-hash ${{matrix.with_hash}} --with-coverage true --verbose true

- name: Test
run: |
Expand Down
13 changes: 8 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,16 @@ option(USE_VALGRIND "Use valgrind when testing" true)
option(WITH_RUST "Use rust implemented internals (experimental)" false)
if(WITH_RUST)
add_subdirectory(cmake/corrosion)
if(CMAKE_BUILD_TYPE MATCHES DEBUG)
corrosion_import_crate(MANIFEST_PATH Cargo.toml CRATES chewing FEATURES capi test-tracing)
else()
corrosion_import_crate(MANIFEST_PATH Cargo.toml CRATES chewing FEATURES capi)
endif()
corrosion_import_crate(MANIFEST_PATH Cargo.toml CRATES chewing CRATE_TYPES staticlib FEATURES capi)
corrosion_import_crate(MANIFEST_PATH Cargo.toml CRATES chewing-tools)
add_compile_definitions(WITH_RUST)
if(CMAKE_BUILD_TYPE MATCHES Debug)
corrosion_set_features(chewing FEATURES test-tracing)
endif()
if(WITH_SQLITE3)
corrosion_set_features(chewing FEATURES sqlite)
endif()
corrosion_add_target_local_rustflags(chewing -Ccodegen-units=1)
if(ENABLE_GCOV)
corrosion_set_env_vars(chewing CARGO_INCREMENTAL=0)
corrosion_add_target_local_rustflags(chewing -Cinstrument-coverage -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort)
Expand Down
67 changes: 34 additions & 33 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ include = ["src/**/*.rs", "Cargo.toml", "AUTHORS", "COPYING", "NEWS"]

[dependencies]
bytemuck = { version = "1.14.0", features = ["derive"] }
cdb = { version = "0.6.0", git = "https://github.com/kanru/cdb-rs" }
directories = "5.0.0"
indexmap = "2.1.0"
riff = "2.0.0"
rusqlite = "0.30.0"
rusqlite = { version = "0.30.0", optional = true }
thiserror = "1.0.0"
tracing = { version = "0.1.40", features = [
"max_level_trace",
Expand All @@ -25,22 +25,25 @@ tracing-subscriber = { version = "0.3.18", features = [
], optional = true }

[target.'cfg(windows)'.dependencies]
rusqlite = { version = "0.30.0", features = ["bundled"] }
rusqlite = { version = "0.30.0", features = ["bundled"], optional = true }

[lib]
crate-type = ["lib", "staticlib"]
crate-type = ["rlib", "staticlib"]

[features]
default = []
capi = []
sqlite = ["rusqlite"]
test-tracing = ["tracing-subscriber"]

[dev-dependencies]
tempfile = "3"

[workspace]
members = ["tools", "xtask"]
resolver = "2"

[profile.release]
lto = true
debug = true
panic = "abort"
46 changes: 22 additions & 24 deletions src/capi/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{
cmp::min,
collections::BTreeMap,
ffi::{c_char, c_int, c_uint, c_ushort, c_void, CStr, CString},
mem,
iter, mem,
ptr::{null, null_mut},
slice, str,
sync::OnceLock,
Expand All @@ -20,17 +20,14 @@ use crate::{
types::{ChewingContext, SelKeys},
},
conversion::{ChewingEngine, Interval, Symbol},
dictionary::{
Dictionary, LayeredDictionary, SystemDictionaryLoader, UserDictionaryLoader,
UserFreqEstimateLoader,
},
dictionary::{LayeredDictionary, SystemDictionaryLoader, UserDictionaryLoader},
editor::{
keyboard::{AnyKeyboardLayout, KeyCode, KeyboardLayout, Modifiers, Qwerty},
syllable::{
DaiChien26, Et, Et26, GinYieh, Hsu, Ibm, KeyboardLayoutCompat, Pinyin, Standard,
},
BasicEditor, CharacterForm, Editor, EditorKeyBehavior, EditorOptions, LanguageMode,
SyllableEditor, UserPhraseAddDirection,
LaxUserFreqEstimate, SyllableEditor, UserPhraseAddDirection,
},
zhuyin::Syllable,
};
Expand Down Expand Up @@ -137,7 +134,7 @@ pub extern "C" fn chewing_new2(
.expect("invalid syspath string");
SystemDictionaryLoader::new().sys_path(search_path).load()
};
let mut dictionaries = match dictionaries {
let dictionaries = match dictionaries {
Some(d) => d,
None => return null_mut(),
};
Expand All @@ -155,24 +152,14 @@ pub extern "C" fn chewing_new2(
Some(d) => d,
None => return null_mut(),
};
dictionaries.insert(0, user_dictionary);

let estimate = if userpath.is_null() {
UserFreqEstimateLoader::new().load()
} else {
let data_path = unsafe { CStr::from_ptr(userpath) }
.to_str()
.expect("invalid syspath string");
UserFreqEstimateLoader::new()
.userphrase_path(data_path)
.load()
};
let estimate = LaxUserFreqEstimate::open(user_dictionary.as_ref());
let estimate = match estimate {
Some(d) => d,
None => return null_mut(),
Ok(d) => d,
Err(_) => return null_mut(),
};

let dict = LayeredDictionary::new(dictionaries, vec![]);
let dict = LayeredDictionary::new(dictionaries, user_dictionary);
let conversion_engine = ChewingEngine::new();
let kb_compat = KeyboardLayoutCompat::Default;
let keyboard = AnyKeyboardLayout::Qwerty(Qwerty);
Expand Down Expand Up @@ -737,7 +724,13 @@ pub extern "C" fn chewing_userphrase_enumerate(ctx: *mut ChewingContext) -> c_in
None => return -1,
};

ctx.userphrase_iter = Some(ctx.editor.user_dict().entries().peekable());
ctx.userphrase_iter = Some(
ctx.editor
.user_dict()
.entries()
.unwrap_or(Box::new(iter::empty()))
.peekable(),
);
0
}

Expand Down Expand Up @@ -924,9 +917,14 @@ pub extern "C" fn chewing_userphrase_lookup(
Some(phrase) => ctx
.editor
.user_dict()
.lookup_phrase(&syllables)
.lookup_all_phrases(&syllables)
.iter()
.any(|ph| ph.as_str() == phrase) as c_int,
None => (ctx.editor.user_dict().lookup_phrase(&syllables).count() > 0) as c_int,
None => ctx
.editor
.user_dict()
.lookup_first_phrase(&syllables)
.is_some() as c_int,
}
}

Expand Down
Loading

0 comments on commit 080b261

Please sign in to comment.