diff --git a/Cargo.toml b/Cargo.toml index e3c3e5a..314dbeb 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pywordfreq" -version = "0.3.0" +version = "0.3.1" authors = ["Gal Ben David "] edition = "2021" description = "Word frequency checker based on Wikipedia corpus written in Rust" @@ -36,12 +36,13 @@ crate-type = ["cdylib"] [dependencies] ahash = "0.7" -suffix = "1.2" -once_cell = "1.8" flate2 = { version = "1", features = ["zlib-ng-compat"], default-features = false } +memchr = "2.4" +once_cell = "1.9" +suffix = "1.2" [dependencies.pyo3] -version = "0.15.0" +version = "0.15.1" features = ["extension-module"] [profile.release] diff --git a/pyproject.toml b/pyproject.toml index 63be7dc..88fa504 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,11 +9,10 @@ sdist-include = [ "pywordfreq/*.py", "pywordfreq/*.pyi" ] -# strip = true [tool.poetry] name = "pywordfreq" -version = "0.3.0" +version = "0.3.1" authors = ["Gal Ben David "] description = "Word frequency checker based on Wikipedia corpus written in Rust" readme = "README.md" diff --git a/pywordfreq/__init__.py b/pywordfreq/__init__.py index f2807a4..6e0668f 100755 --- a/pywordfreq/__init__.py +++ b/pywordfreq/__init__.py @@ -23,7 +23,7 @@ def lazy_full_frequency( def lazy_partial_frequency( - word, + pattern, ): pywordfreq.load_dictionary( importlib.resources.read_binary( @@ -38,7 +38,7 @@ def lazy_partial_frequency( full_frequency = pywordfreq.full_frequency partial_frequency = pywordfreq.partial_frequency - return pywordfreq.partial_frequency(word) + return pywordfreq.partial_frequency(pattern) full_frequency = lazy_full_frequency diff --git a/src/lib.rs b/src/lib.rs index 2e1d2ef..f1c029d 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ use ahash::{AHashMap, AHashSet}; use flate2::read::GzDecoder; -use once_cell::sync::Lazy; +use memchr::memmem; +use once_cell::sync::{Lazy, OnceCell}; use pyo3::prelude::*; use pyo3::types::PyUnicode; use std::io::prelude::*; @@ -27,10 +28,13 @@ static mut FOUND_WORDS_START_INDEX: Lazy> = Lazy::new( AHashSet::with_capacity(1000) } ); +static NL_RFINDER: OnceCell = OnceCell::new(); #[pymodule] fn pywordfreq(_py: Python, m: &PyModule) -> PyResult<()> { + NL_RFINDER.set(memmem::FinderRev::new(b"\n")).unwrap(); + #[pyfn(m)] fn load_dictionary( dictionary_compressed: &[u8], @@ -96,7 +100,7 @@ fn pywordfreq(_py: Python, m: &PyModule) -> PyResult<()> { let suffix_table_text = SUFFIX_TABLE.text(); for suffix_index in SUFFIX_TABLE.positions(word_lowered.as_str()) { - let start_index: usize = match suffix_table_text.get_unchecked(..*suffix_index as usize).rfind('\n') { + let start_index: usize = match NL_RFINDER.get_unchecked().rfind(suffix_table_text.get_unchecked(..*suffix_index as usize)) { Some(start_index) => start_index + 1, None => 0, };