Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update pyo3 to v0.23 #284

Merged
merged 3 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ name = "sudachipy"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.22", features = ["extension-module"] }
pyo3 = { version = "0.23", features = ["extension-module"] }
scopeguard = "1" # Apache 2.0/MIT
thread_local = "1.1" # Apache 2.0/MIT

Expand Down
3 changes: 1 addition & 2 deletions python/build-wheels-manylinux-pgo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ export CARGO_BUILD_TARGET=x86_64-unknown-linux-gnu

# see following link for the list of cpython bin
# https://github.com/pypa/manylinux?tab=readme-ov-file#image-content
# TODO: after supporting py313t, "/opt/python/cp{37,38,39,310,311,312,313}-*/bin" would suffice.
for PYBIN in /opt/python/cp*-cp{37m,38,39,310,311,312,313}/bin; do
for PYBIN in /opt/python/cp{37,38,39,310,311,312,313}-*/bin; do
"${PYBIN}/pip" install -U setuptools wheel setuptools-rust
find . -iname 'sudachipy*.so'
rm -f build/lib/sudachipy/sudachipy*.so
Expand Down
17 changes: 5 additions & 12 deletions python/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::io::BufWriter;
use std::path::Path;

use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
use pyo3::types::{PyBytes, PyList, PyString, PyType};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::Config;
Expand All @@ -36,18 +36,11 @@ pub fn register_functions(m: &Bound<PyModule>) -> PyResult<()> {
}

fn to_stats<T: DictionaryAccess>(py: Python, builder: DictBuilder<T>) -> PyResult<Bound<PyList>> {
let stats = PyList::empty_bound(py);
let stats = PyList::empty(py);

for p in builder.report() {
let t = PyTuple::new_bound(
py,
[
p.part().into_py(py),
p.size().into_py(py),
p.time().as_secs_f64().into_py(py),
],
);
stats.append(t)?;
let values = (p.part(), p.size(), p.time().as_secs_f64());
stats.append(values.into_pyobject(py)?)?;
}

Ok(stats)
Expand Down Expand Up @@ -174,7 +167,7 @@ fn resolve_as_pypathstr<'py>(
py: Python<'py>,
data: &Bound<'py, PyAny>,
) -> PyResult<Option<Bound<'py, PyString>>> {
let binding = py.import_bound("pathlib")?.getattr("Path")?;
let binding = py.import("pathlib")?.getattr("Path")?;
let path = binding.downcast::<PyType>()?;
if data.is_instance(path)? {
Ok(Some(data.call_method0("resolve")?.str()?))
Expand Down
34 changes: 14 additions & 20 deletions python/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;

use pyo3::ffi::c_str;
use pyo3::prelude::*;
use pyo3::types::{PySet, PyString, PyTuple};

Expand Down Expand Up @@ -160,7 +161,7 @@ impl PyDictionary {
if dict_type.is_some() {
errors::warn_deprecation(
py,
"Parameter dict_type of Dictionary() is deprecated, use dict instead",
c_str!("Parameter dict_type of Dictionary() is deprecated, use dict instead"),
)?
}

Expand Down Expand Up @@ -211,7 +212,9 @@ impl PyDictionary {
.pos_list
.iter()
.map(|pos| {
let tuple: Py<PyTuple> = PyTuple::new_bound(py, pos).into_py(py);
let tuple: Py<PyTuple> = PyTuple::new(py, pos)
.expect("failed to convert POS tuple")
.unbind();
tuple
})
.collect();
Expand Down Expand Up @@ -288,12 +291,8 @@ impl PyDictionary {
/// :param target: can be either a list of POS partial tuples or a callable which maps POS to bool.
///
/// :type target: Iterable[PartialPOS] | Callable[[POS], bool]
fn pos_matcher<'py>(
&'py self,
py: Python<'py>,
target: &Bound<'py, PyAny>,
) -> PyResult<PyPosMatcher> {
PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target)
fn pos_matcher<'py>(&'py self, target: &Bound<'py, PyAny>) -> PyResult<PyPosMatcher> {
PyPosMatcher::create(self.dictionary.as_ref().unwrap(), target)
}

/// Creates HuggingFace Tokenizers-compatible PreTokenizer.
Expand Down Expand Up @@ -367,13 +366,12 @@ impl PyDictionary {
)
};

let internal = PyPretokenizer::new(dict, mode, required_fields, handler, projection);
let internal_cell = Bound::new(py, internal)?;
let module = py.import_bound("tokenizers.pre_tokenizers")?;
let pretokenizer = PyPretokenizer::new(dict, mode, required_fields, handler, projection);
let module = py.import("tokenizers.pre_tokenizers")?;
module
.getattr("PreTokenizer")?
.getattr("custom")?
.call1(PyTuple::new_bound(py, [internal_cell]))
.call1((pretokenizer,))
}

/// Look up morphemes in the binary dictionary without performing the analysis.
Expand Down Expand Up @@ -507,7 +505,7 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
)));
}
let py = config_opt.py();
let cfg_type = py.import_bound("sudachipy.config")?.getattr("Config")?;
let cfg_type = py.import("sudachipy.config")?.getattr("Config")?;
if config_opt.is_instance(&cfg_type)? {
let cfg_as_str = config_opt.call_method0("as_jsons")?;
return read_config(&cfg_as_str);
Expand All @@ -520,24 +518,20 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
}

pub(crate) fn read_default_config(py: Python) -> PyResult<ConfigBuilder> {
let path = py
.import_bound("sudachipy")?
.getattr("_DEFAULT_SETTINGFILE")?;
let path = py.import("sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?;
let path = path.downcast::<PyString>()?.to_str()?;
let path = PathBuf::from(path);
errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
}

pub(crate) fn get_default_resource_dir(py: Python) -> PyResult<PathBuf> {
let path = py
.import_bound("sudachipy")?
.getattr("_DEFAULT_RESOURCEDIR")?;
let path = py.import("sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?;
let path = path.downcast::<PyString>()?.to_str()?;
Ok(PathBuf::from(path))
}

fn find_dict_path(py: Python, dict_type: &str) -> PyResult<PathBuf> {
let pyfunc = py.import_bound("sudachipy")?.getattr("_find_dict_path")?;
let pyfunc = py.import("sudachipy")?.getattr("_find_dict_path")?;
let path = pyfunc.call1((dict_type,))?;
let path = path.downcast::<PyString>()?.to_str()?;
Ok(PathBuf::from(path))
Expand Down
5 changes: 3 additions & 2 deletions python/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

use core::ffi::CStr;
use std::fmt::{Debug, Display};

use pyo3::exceptions::PyDeprecationWarning;
Expand All @@ -37,6 +38,6 @@ pub fn wrap_ctx<T, E: Display, C: Debug + ?Sized>(v: Result<T, E>, ctx: &C) -> P
}
}

pub fn warn_deprecation(py: Python<'_>, msg: &str) -> PyResult<()> {
PyErr::warn_bound(py, &py.get_type_bound::<PyDeprecationWarning>(), msg, 1)
pub fn warn_deprecation(py: Python<'_>, msg: &CStr) -> PyResult<()> {
PyErr::warn(py, &py.get_type::<PyDeprecationWarning>(), msg, 1)
}
41 changes: 27 additions & 14 deletions python/src/morpheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::ops::Deref;
use std::sync::Arc;

use pyo3::exceptions::PyIndexError;
use pyo3::ffi::c_str;
use pyo3::prelude::*;
use pyo3::types::{PyList, PyString, PyTuple, PyType};

Expand Down Expand Up @@ -101,7 +102,7 @@ impl PyMorphemeListWrapper {
fn empty(_cls: &Bound<PyType>, py: Python, dict: &PyDictionary) -> PyResult<Self> {
errors::warn_deprecation(
py,
"Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.",
c_str!("Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList."),
)?;

let cloned = dict.dictionary.as_ref().unwrap().clone();
Expand Down Expand Up @@ -165,7 +166,7 @@ impl PyMorphemeListWrapper {
result.push(' ');
}
}
PyString::new_bound(py, result.as_str())
PyString::new(py, result.as_str())
}

fn __repr__(slf: Py<PyMorphemeListWrapper>, py: Python) -> PyResult<Bound<PyString>> {
Expand All @@ -184,7 +185,7 @@ impl PyMorphemeListWrapper {
result.push_str(",\n");
}
result.push_str("]>");
Ok(PyString::new_bound(py, result.as_str()))
Ok(PyString::new(py, result.as_str()))
}

fn __iter__(slf: Py<Self>) -> PyMorphemeIter {
Expand Down Expand Up @@ -301,7 +302,7 @@ impl PyMorpheme {
let list = self.list(py);
let morph = self.morph(py);
match list.projection() {
None => PyString::new_bound(py, morph.surface().deref()),
None => PyString::new(py, morph.surface().deref()),
Some(proj) => proj.project(morph.deref(), py),
}
}
Expand All @@ -311,7 +312,7 @@ impl PyMorpheme {
/// See `Config.projection`.
#[pyo3(text_signature = "(self, /) -> str")]
fn raw_surface<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> {
PyString::new_bound(py, self.morph(py).surface().deref())
PyString::new(py, self.morph(py).surface().deref())
}

/// Returns the part of speech as a six-element tuple.
Expand All @@ -334,20 +335,32 @@ impl PyMorpheme {

/// Returns the dictionary form.
#[pyo3(text_signature = "(self, /) -> str")]
fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyObject {
self.morph(py).get_word_info().dictionary_form().into_py(py)
fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
Ok(self
.morph(py)
.get_word_info()
.dictionary_form()
.into_pyobject(py)?)
}

/// Returns the normalized form.
#[pyo3(text_signature = "(self, /) -> str")]
fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyObject {
self.morph(py).get_word_info().normalized_form().into_py(py)
fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
Ok(self
.morph(py)
.get_word_info()
.normalized_form()
.into_pyobject(py)?)
}

/// Returns the reading form.
#[pyo3(text_signature = "(self, /) -> str")]
fn reading_form<'py>(&'py self, py: Python<'py>) -> PyObject {
self.morph(py).get_word_info().reading_form().into_py(py)
fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
Ok(self
.morph(py)
.get_word_info()
.reading_form()
.into_pyobject(py)?)
}

/// Returns sub-morphemes in the provided split mode.
Expand Down Expand Up @@ -431,10 +444,10 @@ impl PyMorpheme {

/// Returns the list of synonym group ids.
#[pyo3(text_signature = "(self, /) -> List[int]")]
fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> Bound<PyList> {
fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyList>> {
let mref = self.morph(py);
let ids = mref.get_word_info().synonym_group_ids();
PyList::new_bound(py, ids)
PyList::new(py, ids)
}

/// Returns the word info.
Expand All @@ -443,7 +456,7 @@ impl PyMorpheme {
/// Users should not touch the raw WordInfo.
#[pyo3(text_signature = "(self, /) -> WordInfo")]
fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
errors::warn_deprecation(py, "Users should not touch the raw WordInfo.")?;
errors::warn_deprecation(py, c_str!("Users should not touch the raw WordInfo."))?;
Ok(self.morph(py).get_word_info().clone().into())
}

Expand Down
10 changes: 4 additions & 6 deletions python/src/pos_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,21 @@ pub struct PyPosMatcher {

impl PyPosMatcher {
pub(crate) fn create<'py>(
py: Python<'py>,
dic: &'py Arc<PyDicData>,
data: &Bound<'py, PyAny>,
) -> PyResult<PyPosMatcher> {
if data.is_callable() {
Self::create_from_fn(dic, data, py)
Self::create_from_fn(dic, data)
} else {
let iter = data.iter()?;
let iter = data.try_iter()?;
Self::create_from_iter(dic, &iter)
}
}

fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>, py: Python) -> PyResult<Self> {
fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>) -> PyResult<Self> {
let mut data = Vec::new();
for (pos_id, pos) in dic.pos.iter().enumerate() {
let args = PyTuple::new_bound(py, [pos]);
if func.call1(args)?.downcast::<PyBool>()?.is_true() {
if func.call1((pos,))?.downcast::<PyBool>()?.is_true() {
data.push(pos_id as u16);
}
}
Expand Down
21 changes: 9 additions & 12 deletions python/src/pretokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::sync::Arc;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::sync::GILOnceCell;
use pyo3::types::{PyList, PySlice, PyTuple, PyType};
use pyo3::types::{PyList, PySlice, PyType};
use thread_local::ThreadLocal;

use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
Expand Down Expand Up @@ -154,8 +154,7 @@ impl PyPretokenizer {
}
Some(h) => {
let mrp: &Bound<PyAny> = morphs.bind(py);
let args = PyTuple::new_bound(py, [index, string, mrp]);
h.bind(py).call1(args)
h.bind(py).call1((index, string, mrp))
}
}
}
Expand All @@ -166,7 +165,7 @@ impl PyPretokenizer {
py: Python<'py>,
data: &Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyAny>> {
data.call_method1(intern!(py, "split"), PyTuple::new_bound(py, [self_]))
data.call_method1(intern!(py, "split"), (self_,))
}
}

Expand All @@ -175,12 +174,11 @@ fn make_result_for_surface<'py>(
morphs: &PyMorphemeList,
string: &Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyList>> {
let result = PyList::empty_bound(py);
let result = PyList::empty(py);
for idx in 0..morphs.len() {
let node = morphs.get(idx);
let slice = PySlice::new_bound(py, node.begin_c() as isize, node.end_c() as isize, 1);
let args = PyTuple::new_bound(py, [slice]);
let substring = string.call_method1(intern!(py, "slice"), args)?;
let slice = PySlice::new(py, node.begin_c() as isize, node.end_c() as isize, 1);
let substring = string.call_method1(intern!(py, "slice"), (slice,))?;
result.append(substring)?;
}
Ok(result)
Expand All @@ -191,20 +189,19 @@ fn make_result_for_projection<'py>(
morphs: &PyMorphemeList,
proj: &dyn MorphemeProjection,
) -> PyResult<Bound<'py, PyList>> {
let result = PyList::empty_bound(py);
let result = PyList::empty(py);
let nstring = {
static NORMALIZED_STRING: GILOnceCell<Py<PyType>> = GILOnceCell::new();
NORMALIZED_STRING.get_or_try_init(py, || -> PyResult<Py<PyType>> {
let ns = py.import_bound("tokenizers")?.getattr("NormalizedString")?;
let ns = py.import("tokenizers")?.getattr("NormalizedString")?;
let tpe = ns.downcast::<PyType>()?;
Ok(tpe.clone().unbind())
})?
};
for idx in 0..morphs.len() {
let node = morphs.get(idx);
let value = proj.project(&node, py);
let args = PyTuple::new_bound(py, [value]);
let substring = nstring.call1(py, args)?;
let substring = nstring.call1(py, (value,))?;
result.append(substring)?;
}
Ok(result)
Expand Down
Loading
Loading