Skip to content

Commit

Permalink
Dependencies upgrades. Performance improvement of searching a newline
Browse files Browse the repository at this point in the history
  • Loading branch information
Gal Ben David committed Jan 11, 2022
1 parent c08080a commit 897f19f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 10 deletions.
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pywordfreq"
version = "0.3.0"
version = "0.3.1"
authors = ["Gal Ben David <[email protected]>"]
edition = "2021"
description = "Word frequency checker based on Wikipedia corpus written in Rust"
Expand Down Expand Up @@ -36,12 +36,13 @@ crate-type = ["cdylib"]

[dependencies]
ahash = "0.7"
suffix = "1.2"
once_cell = "1.8"
flate2 = { version = "1", features = ["zlib-ng-compat"], default-features = false }
memchr = "2.4"
once_cell = "1.9"
suffix = "1.2"

[dependencies.pyo3]
version = "0.15.0"
version = "0.15.1"
features = ["extension-module"]

[profile.release]
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ sdist-include = [
"pywordfreq/*.py",
"pywordfreq/*.pyi"
]
# strip = true

[tool.poetry]
name = "pywordfreq"
version = "0.3.0"
version = "0.3.1"
authors = ["Gal Ben David <[email protected]>"]
description = "Word frequency checker based on Wikipedia corpus written in Rust"
readme = "README.md"
Expand Down
4 changes: 2 additions & 2 deletions pywordfreq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def lazy_full_frequency(


def lazy_partial_frequency(
word,
pattern,
):
pywordfreq.load_dictionary(
importlib.resources.read_binary(
Expand All @@ -38,7 +38,7 @@ def lazy_partial_frequency(
full_frequency = pywordfreq.full_frequency
partial_frequency = pywordfreq.partial_frequency

return pywordfreq.partial_frequency(word)
return pywordfreq.partial_frequency(pattern)


full_frequency = lazy_full_frequency
Expand Down
8 changes: 6 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use ahash::{AHashMap, AHashSet};
use flate2::read::GzDecoder;
use once_cell::sync::Lazy;
use memchr::memmem;
use once_cell::sync::{Lazy, OnceCell};
use pyo3::prelude::*;
use pyo3::types::PyUnicode;
use std::io::prelude::*;
Expand All @@ -27,10 +28,13 @@ static mut FOUND_WORDS_START_INDEX: Lazy<AHashSet<usize>> = Lazy::new(
AHashSet::with_capacity(1000)
}
);
static NL_RFINDER: OnceCell<memmem::FinderRev> = OnceCell::new();


#[pymodule]
fn pywordfreq(_py: Python, m: &PyModule) -> PyResult<()> {
NL_RFINDER.set(memmem::FinderRev::new(b"\n")).unwrap();

#[pyfn(m)]
fn load_dictionary(
dictionary_compressed: &[u8],
Expand Down Expand Up @@ -96,7 +100,7 @@ fn pywordfreq(_py: Python, m: &PyModule) -> PyResult<()> {
let suffix_table_text = SUFFIX_TABLE.text();

for suffix_index in SUFFIX_TABLE.positions(word_lowered.as_str()) {
let start_index: usize = match suffix_table_text.get_unchecked(..*suffix_index as usize).rfind('\n') {
let start_index: usize = match NL_RFINDER.get_unchecked().rfind(suffix_table_text.get_unchecked(..*suffix_index as usize)) {
Some(start_index) => start_index + 1,
None => 0,
};
Expand Down

0 comments on commit 897f19f

Please sign in to comment.