From 8a7ce734461a28e46fa601d91ae61d42d8bd7447 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 3 Dec 2024 10:47:16 +0900 Subject: [PATCH 1/3] update pyo3 to v0.23 --- python/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index 95b8b6ce..aff53759 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -15,7 +15,7 @@ name = "sudachipy" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.22", features = ["extension-module"] } +pyo3 = { version = "0.23", features = ["extension-module"] } scopeguard = "1" # Apache 2.0/MIT thread_local = "1.1" # Apache 2.0/MIT From 0a9e1983c01058735448fee78c1e1f87e0e301d1 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 3 Dec 2024 17:07:06 +0900 Subject: [PATCH 2/3] rewrite deprecated pyo3 methods --- python/src/build.rs | 17 +++++----------- python/src/dictionary.rs | 34 +++++++++++++------------------ python/src/errors.rs | 5 +++-- python/src/morpheme.rs | 41 +++++++++++++++++++++++++------------- python/src/pos_matcher.rs | 10 ++++------ python/src/pretokenizer.rs | 21 +++++++++---------- python/src/projection.rs | 16 +++++++-------- 7 files changed, 70 insertions(+), 74 deletions(-) diff --git a/python/src/build.rs b/python/src/build.rs index 532eb1fa..fa8265ee 100644 --- a/python/src/build.rs +++ b/python/src/build.rs @@ -19,7 +19,7 @@ use std::io::BufWriter; use std::path::Path; use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType}; +use pyo3::types::{PyBytes, PyList, PyString, PyType}; use sudachi::analysis::stateless_tokenizer::DictionaryAccess; use sudachi::config::Config; @@ -36,18 +36,11 @@ pub fn register_functions(m: &Bound) -> PyResult<()> { } fn to_stats(py: Python, builder: DictBuilder) -> PyResult> { - let stats = PyList::empty_bound(py); + let stats = PyList::empty(py); for p in builder.report() { - let t = PyTuple::new_bound( - py, - [ - p.part().into_py(py), - p.size().into_py(py), - p.time().as_secs_f64().into_py(py), - ], - ); - stats.append(t)?; + let values = (p.part(), p.size(), p.time().as_secs_f64()); + stats.append(values.into_pyobject(py)?)?; } Ok(stats) @@ -174,7 +167,7 @@ fn resolve_as_pypathstr<'py>( py: Python<'py>, data: &Bound<'py, PyAny>, ) -> PyResult>> { - let binding = py.import_bound("pathlib")?.getattr("Path")?; + let binding = py.import("pathlib")?.getattr("Path")?; let path = binding.downcast::()?; if data.is_instance(path)? { Ok(Some(data.call_method0("resolve")?.str()?)) diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index 5e0e6904..e0105fc8 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -20,6 +20,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; +use pyo3::ffi::c_str; use pyo3::prelude::*; use pyo3::types::{PySet, PyString, PyTuple}; @@ -160,7 +161,7 @@ impl PyDictionary { if dict_type.is_some() { errors::warn_deprecation( py, - "Parameter dict_type of Dictionary() is deprecated, use dict instead", + c_str!("Parameter dict_type of Dictionary() is deprecated, use dict instead"), )? } @@ -211,7 +212,9 @@ impl PyDictionary { .pos_list .iter() .map(|pos| { - let tuple: Py = PyTuple::new_bound(py, pos).into_py(py); + let tuple: Py = PyTuple::new(py, pos) + .expect("failed to convert POS tuple") + .unbind(); tuple }) .collect(); @@ -288,12 +291,8 @@ impl PyDictionary { /// :param target: can be either a list of POS partial tuples or a callable which maps POS to bool. /// /// :type target: Iterable[PartialPOS] | Callable[[POS], bool] - fn pos_matcher<'py>( - &'py self, - py: Python<'py>, - target: &Bound<'py, PyAny>, - ) -> PyResult { - PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target) + fn pos_matcher<'py>(&'py self, target: &Bound<'py, PyAny>) -> PyResult { + PyPosMatcher::create(self.dictionary.as_ref().unwrap(), target) } /// Creates HuggingFace Tokenizers-compatible PreTokenizer. @@ -367,13 +366,12 @@ impl PyDictionary { ) }; - let internal = PyPretokenizer::new(dict, mode, required_fields, handler, projection); - let internal_cell = Bound::new(py, internal)?; - let module = py.import_bound("tokenizers.pre_tokenizers")?; + let pretokenizer = PyPretokenizer::new(dict, mode, required_fields, handler, projection); + let module = py.import("tokenizers.pre_tokenizers")?; module .getattr("PreTokenizer")? .getattr("custom")? - .call1(PyTuple::new_bound(py, [internal_cell])) + .call1((pretokenizer,)) } /// Look up morphemes in the binary dictionary without performing the analysis. @@ -507,7 +505,7 @@ fn read_config(config_opt: &Bound) -> PyResult { ))); } let py = config_opt.py(); - let cfg_type = py.import_bound("sudachipy.config")?.getattr("Config")?; + let cfg_type = py.import("sudachipy.config")?.getattr("Config")?; if config_opt.is_instance(&cfg_type)? { let cfg_as_str = config_opt.call_method0("as_jsons")?; return read_config(&cfg_as_str); @@ -520,24 +518,20 @@ fn read_config(config_opt: &Bound) -> PyResult { } pub(crate) fn read_default_config(py: Python) -> PyResult { - let path = py - .import_bound("sudachipy")? - .getattr("_DEFAULT_SETTINGFILE")?; + let path = py.import("sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?; let path = path.downcast::()?.to_str()?; let path = PathBuf::from(path); errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path) } pub(crate) fn get_default_resource_dir(py: Python) -> PyResult { - let path = py - .import_bound("sudachipy")? - .getattr("_DEFAULT_RESOURCEDIR")?; + let path = py.import("sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?; let path = path.downcast::()?.to_str()?; Ok(PathBuf::from(path)) } fn find_dict_path(py: Python, dict_type: &str) -> PyResult { - let pyfunc = py.import_bound("sudachipy")?.getattr("_find_dict_path")?; + let pyfunc = py.import("sudachipy")?.getattr("_find_dict_path")?; let path = pyfunc.call1((dict_type,))?; let path = path.downcast::()?.to_str()?; Ok(PathBuf::from(path)) diff --git a/python/src/errors.rs b/python/src/errors.rs index a049c17d..93081f89 100644 --- a/python/src/errors.rs +++ b/python/src/errors.rs @@ -14,6 +14,7 @@ * limitations under the License. */ +use core::ffi::CStr; use std::fmt::{Debug, Display}; use pyo3::exceptions::PyDeprecationWarning; @@ -37,6 +38,6 @@ pub fn wrap_ctx(v: Result, ctx: &C) -> P } } -pub fn warn_deprecation(py: Python<'_>, msg: &str) -> PyResult<()> { - PyErr::warn_bound(py, &py.get_type_bound::(), msg, 1) +pub fn warn_deprecation(py: Python<'_>, msg: &CStr) -> PyResult<()> { + PyErr::warn(py, &py.get_type::(), msg, 1) } diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs index 49e854a3..ebd37d72 100644 --- a/python/src/morpheme.rs +++ b/python/src/morpheme.rs @@ -19,6 +19,7 @@ use std::ops::Deref; use std::sync::Arc; use pyo3::exceptions::PyIndexError; +use pyo3::ffi::c_str; use pyo3::prelude::*; use pyo3::types::{PyList, PyString, PyTuple, PyType}; @@ -101,7 +102,7 @@ impl PyMorphemeListWrapper { fn empty(_cls: &Bound, py: Python, dict: &PyDictionary) -> PyResult { errors::warn_deprecation( py, - "Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.", + c_str!("Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList."), )?; let cloned = dict.dictionary.as_ref().unwrap().clone(); @@ -165,7 +166,7 @@ impl PyMorphemeListWrapper { result.push(' '); } } - PyString::new_bound(py, result.as_str()) + PyString::new(py, result.as_str()) } fn __repr__(slf: Py, py: Python) -> PyResult> { @@ -184,7 +185,7 @@ impl PyMorphemeListWrapper { result.push_str(",\n"); } result.push_str("]>"); - Ok(PyString::new_bound(py, result.as_str())) + Ok(PyString::new(py, result.as_str())) } fn __iter__(slf: Py) -> PyMorphemeIter { @@ -301,7 +302,7 @@ impl PyMorpheme { let list = self.list(py); let morph = self.morph(py); match list.projection() { - None => PyString::new_bound(py, morph.surface().deref()), + None => PyString::new(py, morph.surface().deref()), Some(proj) => proj.project(morph.deref(), py), } } @@ -311,7 +312,7 @@ impl PyMorpheme { /// See `Config.projection`. #[pyo3(text_signature = "(self, /) -> str")] fn raw_surface<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> { - PyString::new_bound(py, self.morph(py).surface().deref()) + PyString::new(py, self.morph(py).surface().deref()) } /// Returns the part of speech as a six-element tuple. @@ -334,20 +335,32 @@ impl PyMorpheme { /// Returns the dictionary form. #[pyo3(text_signature = "(self, /) -> str")] - fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyObject { - self.morph(py).get_word_info().dictionary_form().into_py(py) + fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult> { + Ok(self + .morph(py) + .get_word_info() + .dictionary_form() + .into_pyobject(py)?) } /// Returns the normalized form. #[pyo3(text_signature = "(self, /) -> str")] - fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyObject { - self.morph(py).get_word_info().normalized_form().into_py(py) + fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult> { + Ok(self + .morph(py) + .get_word_info() + .normalized_form() + .into_pyobject(py)?) } /// Returns the reading form. #[pyo3(text_signature = "(self, /) -> str")] - fn reading_form<'py>(&'py self, py: Python<'py>) -> PyObject { - self.morph(py).get_word_info().reading_form().into_py(py) + fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult> { + Ok(self + .morph(py) + .get_word_info() + .reading_form() + .into_pyobject(py)?) } /// Returns sub-morphemes in the provided split mode. @@ -431,10 +444,10 @@ impl PyMorpheme { /// Returns the list of synonym group ids. #[pyo3(text_signature = "(self, /) -> List[int]")] - fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> Bound { + fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult> { let mref = self.morph(py); let ids = mref.get_word_info().synonym_group_ids(); - PyList::new_bound(py, ids) + PyList::new(py, ids) } /// Returns the word info. @@ -443,7 +456,7 @@ impl PyMorpheme { /// Users should not touch the raw WordInfo. #[pyo3(text_signature = "(self, /) -> WordInfo")] fn get_word_info(&self, py: Python) -> PyResult { - errors::warn_deprecation(py, "Users should not touch the raw WordInfo.")?; + errors::warn_deprecation(py, c_str!("Users should not touch the raw WordInfo."))?; Ok(self.morph(py).get_word_info().clone().into()) } diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs index b8648517..220d5120 100644 --- a/python/src/pos_matcher.rs +++ b/python/src/pos_matcher.rs @@ -39,23 +39,21 @@ pub struct PyPosMatcher { impl PyPosMatcher { pub(crate) fn create<'py>( - py: Python<'py>, dic: &'py Arc, data: &Bound<'py, PyAny>, ) -> PyResult { if data.is_callable() { - Self::create_from_fn(dic, data, py) + Self::create_from_fn(dic, data) } else { - let iter = data.iter()?; + let iter = data.try_iter()?; Self::create_from_iter(dic, &iter) } } - fn create_from_fn(dic: &Arc, func: &Bound, py: Python) -> PyResult { + fn create_from_fn(dic: &Arc, func: &Bound) -> PyResult { let mut data = Vec::new(); for (pos_id, pos) in dic.pos.iter().enumerate() { - let args = PyTuple::new_bound(py, [pos]); - if func.call1(args)?.downcast::()?.is_true() { + if func.call1((pos,))?.downcast::()?.is_true() { data.push(pos_id as u16); } } diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index 8c4ee3f0..67e19b89 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use pyo3::intern; use pyo3::prelude::*; use pyo3::sync::GILOnceCell; -use pyo3::types::{PyList, PySlice, PyTuple, PyType}; +use pyo3::types::{PyList, PySlice, PyType}; use thread_local::ThreadLocal; use sudachi::analysis::stateful_tokenizer::StatefulTokenizer; @@ -154,8 +154,7 @@ impl PyPretokenizer { } Some(h) => { let mrp: &Bound = morphs.bind(py); - let args = PyTuple::new_bound(py, [index, string, mrp]); - h.bind(py).call1(args) + h.bind(py).call1((index, string, mrp)) } } } @@ -166,7 +165,7 @@ impl PyPretokenizer { py: Python<'py>, data: &Bound<'py, PyAny>, ) -> PyResult> { - data.call_method1(intern!(py, "split"), PyTuple::new_bound(py, [self_])) + data.call_method1(intern!(py, "split"), (self_,)) } } @@ -175,12 +174,11 @@ fn make_result_for_surface<'py>( morphs: &PyMorphemeList, string: &Bound<'py, PyAny>, ) -> PyResult> { - let result = PyList::empty_bound(py); + let result = PyList::empty(py); for idx in 0..morphs.len() { let node = morphs.get(idx); - let slice = PySlice::new_bound(py, node.begin_c() as isize, node.end_c() as isize, 1); - let args = PyTuple::new_bound(py, [slice]); - let substring = string.call_method1(intern!(py, "slice"), args)?; + let slice = PySlice::new(py, node.begin_c() as isize, node.end_c() as isize, 1); + let substring = string.call_method1(intern!(py, "slice"), (slice,))?; result.append(substring)?; } Ok(result) @@ -191,11 +189,11 @@ fn make_result_for_projection<'py>( morphs: &PyMorphemeList, proj: &dyn MorphemeProjection, ) -> PyResult> { - let result = PyList::empty_bound(py); + let result = PyList::empty(py); let nstring = { static NORMALIZED_STRING: GILOnceCell> = GILOnceCell::new(); NORMALIZED_STRING.get_or_try_init(py, || -> PyResult> { - let ns = py.import_bound("tokenizers")?.getattr("NormalizedString")?; + let ns = py.import("tokenizers")?.getattr("NormalizedString")?; let tpe = ns.downcast::()?; Ok(tpe.clone().unbind()) })? @@ -203,8 +201,7 @@ fn make_result_for_projection<'py>( for idx in 0..morphs.len() { let node = morphs.get(idx); let value = proj.project(&node, py); - let args = PyTuple::new_bound(py, [value]); - let substring = nstring.call1(py, args)?; + let substring = nstring.call1(py, (value,))?; result.append(substring)?; } Ok(result) diff --git a/python/src/projection.rs b/python/src/projection.rs index 3075236f..bda1d55d 100644 --- a/python/src/projection.rs +++ b/python/src/projection.rs @@ -36,7 +36,7 @@ struct Surface {} impl MorphemeProjection for Surface { fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { - PyString::new_bound(py, m.surface().deref()) + PyString::new(py, m.surface().deref()) } } @@ -46,7 +46,7 @@ struct Mapped Fn(&'a Morpheme<'a, Arc>) -> &'a str> { impl Fn(&'a Morpheme<'a, Arc>) -> &'a str> MorphemeProjection for Mapped { fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { - PyString::new_bound(py, (self.func)(m)) + PyString::new(py, (self.func)(m)) } } @@ -64,9 +64,9 @@ impl DictionaryAndSurface { impl MorphemeProjection for DictionaryAndSurface { fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { if self.matcher.matches_id(m.part_of_speech_id()) { - PyString::new_bound(py, m.surface().deref()) + PyString::new(py, m.surface().deref()) } else { - PyString::new_bound(py, m.dictionary_form()) + PyString::new(py, m.dictionary_form()) } } } @@ -85,9 +85,9 @@ impl NormalizedAndSurface { impl MorphemeProjection for NormalizedAndSurface { fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { if self.matcher.matches_id(m.part_of_speech_id()) { - PyString::new_bound(py, m.surface().deref()) + PyString::new(py, m.surface().deref()) } else { - PyString::new_bound(py, m.normalized_form()) + PyString::new(py, m.normalized_form()) } } } @@ -106,9 +106,9 @@ impl NormalizedNouns { impl MorphemeProjection for NormalizedNouns { fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { if self.matcher.matches_id(m.part_of_speech_id()) { - PyString::new_bound(py, m.normalized_form()) + PyString::new(py, m.normalized_form()) } else { - PyString::new_bound(py, m.surface().deref()) + PyString::new(py, m.surface().deref()) } } } From 97aec8131f9f12c688b7a22f832154907832d01c Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 3 Dec 2024 17:49:53 +0900 Subject: [PATCH 3/3] update manylinux-pgo for py3.13t --- python/build-wheels-manylinux-pgo.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/build-wheels-manylinux-pgo.sh b/python/build-wheels-manylinux-pgo.sh index da0d12d8..7584e51b 100644 --- a/python/build-wheels-manylinux-pgo.sh +++ b/python/build-wheels-manylinux-pgo.sh @@ -35,8 +35,7 @@ export CARGO_BUILD_TARGET=x86_64-unknown-linux-gnu # see following link for the list of cpython bin # https://github.com/pypa/manylinux?tab=readme-ov-file#image-content -# TODO: after supporting py313t, "/opt/python/cp{37,38,39,310,311,312,313}-*/bin" would suffice. -for PYBIN in /opt/python/cp*-cp{37m,38,39,310,311,312,313}/bin; do +for PYBIN in /opt/python/cp{37,38,39,310,311,312,313}-*/bin; do "${PYBIN}/pip" install -U setuptools wheel setuptools-rust find . -iname 'sudachipy*.so' rm -f build/lib/sudachipy/sudachipy*.so