diff --git a/internal/core/thirdparty/tantivy/rust-array.h b/internal/core/thirdparty/tantivy/rust-array.h index ba9baecc1f038..3c687f735b640 100644 --- a/internal/core/thirdparty/tantivy/rust-array.h +++ b/internal/core/thirdparty/tantivy/rust-array.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include "tantivy-binding.h" @@ -11,7 +12,13 @@ namespace milvus::tantivy { struct RustArrayWrapper { NO_COPY_OR_ASSIGN(RustArrayWrapper); - explicit RustArrayWrapper(RustArray array) : array_(array) { + explicit RustArrayWrapper(RustArray&& array) { + array_.array = array.array; + array_.len = array.len; + array_.cap = array.cap; + array.array = nullptr; + array.len = 0; + array.cap = 0; } RustArrayWrapper(RustArrayWrapper&& other) noexcept { @@ -62,4 +69,42 @@ struct RustArrayWrapper { } } }; +struct RustResultWrapper { + NO_COPY_OR_ASSIGN(RustResultWrapper); + + RustResultWrapper() = default; + explicit RustResultWrapper(RustResult result) + : result_(std::make_unique(result)) { + } + + RustResultWrapper(RustResultWrapper&& other) noexcept { + result_ = std::move(other.result_); + } + + RustResultWrapper& + operator=(RustResultWrapper&& other) noexcept { + if (this != &other) { + free(); + result_ = std::move(other.result_); + } + + return *this; + } + + ~RustResultWrapper() { + free(); + } + + std::unique_ptr result_; + + private: + void + free() { + if (result_) { + free_rust_result(*result_); + result_.reset(); + } + } +}; + } // namespace milvus::tantivy diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h index 0e86a40c1b743..61a2088a2d26f 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h +++ b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h @@ -20,10 +20,59 @@ struct RustArray { size_t cap; }; +struct Value { + enum class Tag { + None, + RustArray, + U32, + Ptr, + }; + + struct None_Body { + + }; + + struct RustArray_Body { + RustArray _0; + }; + + struct U32_Body { + uint32_t _0; + }; + + struct Ptr_Body { + void *_0; + }; + + Tag tag; + union { + None_Body none; + RustArray_Body rust_array; + U32_Body u32; + Ptr_Body ptr; + }; +}; + +struct RustResult { + bool success; + Value value; + const char *error; +}; + extern "C" { void free_rust_array(RustArray array); +void free_rust_result(RustResult result); + +void free_rust_error(const char *error); + +RustResult test_enum_with_array(); + +RustResult test_enum_with_ptr(); + +void free_test_ptr(void *ptr); + void print_vector_of_strings(const char *const *ptr, uintptr_t len); void *create_hashmap(); @@ -32,120 +81,164 @@ void hashmap_set_value(void *map, const char *key, const char *value); void free_hashmap(void *map); -void *tantivy_load_index(const char *path); +RustResult tantivy_load_index(const char *path); void tantivy_free_index_reader(void *ptr); -void tantivy_reload_index(void *ptr); +RustResult tantivy_reload_index(void *ptr); -uint32_t tantivy_index_count(void *ptr); +RustResult tantivy_index_count(void *ptr); -RustArray tantivy_term_query_i64(void *ptr, int64_t term); +RustResult tantivy_term_query_i64(void *ptr, int64_t term); -RustArray tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive); +RustResult tantivy_lower_bound_range_query_i64(void *ptr, int64_t lower_bound, bool inclusive); -RustArray tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive); +RustResult tantivy_upper_bound_range_query_i64(void *ptr, int64_t upper_bound, bool inclusive); -RustArray tantivy_range_query_i64(void *ptr, - int64_t lower_bound, - int64_t upper_bound, - bool lb_inclusive, - bool ub_inclusive); +RustResult tantivy_range_query_i64(void *ptr, + int64_t lower_bound, + int64_t upper_bound, + bool lb_inclusive, + bool ub_inclusive); -RustArray tantivy_term_query_f64(void *ptr, double term); +RustResult tantivy_term_query_f64(void *ptr, double term); -RustArray tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive); +RustResult tantivy_lower_bound_range_query_f64(void *ptr, double lower_bound, bool inclusive); -RustArray tantivy_upper_bound_range_query_f64(void *ptr, double upper_bound, bool inclusive); +RustResult tantivy_upper_bound_range_query_f64(void *ptr, double upper_bound, bool inclusive); -RustArray tantivy_range_query_f64(void *ptr, - double lower_bound, - double upper_bound, - bool lb_inclusive, - bool ub_inclusive); +RustResult tantivy_range_query_f64(void *ptr, + double lower_bound, + double upper_bound, + bool lb_inclusive, + bool ub_inclusive); -RustArray tantivy_term_query_bool(void *ptr, bool term); +RustResult tantivy_term_query_bool(void *ptr, bool term); -RustArray tantivy_term_query_keyword(void *ptr, const char *term); +RustResult tantivy_term_query_keyword(void *ptr, const char *term); -RustArray tantivy_lower_bound_range_query_keyword(void *ptr, - const char *lower_bound, - bool inclusive); +RustResult tantivy_lower_bound_range_query_keyword(void *ptr, + const char *lower_bound, + bool inclusive); -RustArray tantivy_upper_bound_range_query_keyword(void *ptr, - const char *upper_bound, - bool inclusive); +RustResult tantivy_upper_bound_range_query_keyword(void *ptr, + const char *upper_bound, + bool inclusive); -RustArray tantivy_range_query_keyword(void *ptr, - const char *lower_bound, - const char *upper_bound, - bool lb_inclusive, - bool ub_inclusive); +RustResult tantivy_range_query_keyword(void *ptr, + const char *lower_bound, + const char *upper_bound, + bool lb_inclusive, + bool ub_inclusive); -RustArray tantivy_prefix_query_keyword(void *ptr, const char *prefix); +RustResult tantivy_prefix_query_keyword(void *ptr, const char *prefix); -RustArray tantivy_regex_query(void *ptr, const char *pattern); +RustResult tantivy_regex_query(void *ptr, const char *pattern); -RustArray tantivy_match_query(void *ptr, const char *query); +RustResult tantivy_match_query(void *ptr, const char *query); -void tantivy_register_tokenizer(void *ptr, const char *tokenizer_name, const char *analyzer_params); +RustResult tantivy_register_tokenizer(void *ptr, + const char *tokenizer_name, + const char *analyzer_params); -void *tantivy_create_index(const char *field_name, - TantivyDataType data_type, - const char *path, - uintptr_t num_threads, - uintptr_t overall_memory_budget_in_bytes); +RustResult tantivy_create_index(const char *field_name, + TantivyDataType data_type, + const char *path, + uintptr_t num_threads, + uintptr_t overall_memory_budget_in_bytes); void tantivy_free_index_writer(void *ptr); -void tantivy_finish_index(void *ptr); - -void tantivy_commit_index(void *ptr); - -void *tantivy_create_reader_from_writer(void *ptr); - -void tantivy_index_add_int8s(void *ptr, const int8_t *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_int16s(void *ptr, const int16_t *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_int32s(void *ptr, const int32_t *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_int64s(void *ptr, const int64_t *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_f32s(void *ptr, const float *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_f64s(void *ptr, const double *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_bools(void *ptr, const bool *array, uintptr_t len, int64_t offset_begin); - -void tantivy_index_add_string(void *ptr, const char *s, int64_t offset); - -void tantivy_index_add_multi_int8s(void *ptr, const int8_t *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_int16s(void *ptr, const int16_t *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_int32s(void *ptr, const int32_t *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_int64s(void *ptr, const int64_t *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_f32s(void *ptr, const float *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_f64s(void *ptr, const double *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_bools(void *ptr, const bool *array, uintptr_t len, int64_t offset); - -void tantivy_index_add_multi_keywords(void *ptr, - const char *const *array, - uintptr_t len, - int64_t offset); - -void *tantivy_create_text_writer(const char *field_name, - const char *path, - const char *tokenizer_name, - const char *analyzer_params, - uintptr_t num_threads, - uintptr_t overall_memory_budget_in_bytes, - bool in_ram); +RustResult tantivy_finish_index(void *ptr); + +RustResult tantivy_commit_index(void *ptr); + +RustResult tantivy_create_reader_from_writer(void *ptr); + +RustResult tantivy_index_add_int8s(void *ptr, + const int8_t *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_int16s(void *ptr, + const int16_t *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_int32s(void *ptr, + const int32_t *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_int64s(void *ptr, + const int64_t *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_f32s(void *ptr, + const float *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_f64s(void *ptr, + const double *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_bools(void *ptr, + const bool *array, + uintptr_t len, + int64_t offset_begin); + +RustResult tantivy_index_add_string(void *ptr, const char *s, int64_t offset); + +RustResult tantivy_index_add_multi_int8s(void *ptr, + const int8_t *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_int16s(void *ptr, + const int16_t *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_int32s(void *ptr, + const int32_t *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_int64s(void *ptr, + const int64_t *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_f32s(void *ptr, + const float *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_f64s(void *ptr, + const double *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_bools(void *ptr, + const bool *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_index_add_multi_keywords(void *ptr, + const char *const *array, + uintptr_t len, + int64_t offset); + +RustResult tantivy_create_text_writer(const char *field_name, + const char *path, + const char *tokenizer_name, + const char *analyzer_params, + uintptr_t num_threads, + uintptr_t overall_memory_budget_in_bytes, + bool in_ram); void free_rust_string(const char *ptr); @@ -157,7 +250,7 @@ bool tantivy_token_stream_advance(void *token_stream); const char *tantivy_token_stream_get_token(void *token_stream); -void *tantivy_create_tokenizer(const char *analyzer_params); +RustResult tantivy_create_tokenizer(const char *analyzer_params); void *tantivy_clone_tokenizer(void *ptr); diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs index 9d71ffa315b05..8584907a38308 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs @@ -1,5 +1,16 @@ +use std::default; +use std::ffi::c_void; +use std::ptr::null; + +use libc::c_char; use libc::size_t; +use crate::error; +use crate::error::Result; +use crate::string_c::create_string; +use crate::string_c::free_rust_string; +use crate::util::free_binding; + #[repr(C)] pub struct RustArray { array: *mut u32, @@ -20,6 +31,22 @@ impl RustArray { } } +impl std::default::Default for RustArray { + fn default() -> Self { + RustArray { + array: std::ptr::null_mut(), + len: 0, + cap: 0, + } + } +} + +impl From> for RustArray { + fn from(vec: Vec) -> Self { + RustArray::from_vec(vec) + } +} + #[no_mangle] pub extern "C" fn free_rust_array(array: RustArray) { let RustArray { array, len, cap } = array; @@ -27,3 +54,129 @@ pub extern "C" fn free_rust_array(array: RustArray) { Vec::from_raw_parts(array, len, cap); } } + +#[repr(C)] +pub enum Value { + None(()), + RustArray(RustArray), + U32(u32), + Ptr(*mut c_void), +} + +macro_rules! impl_from_for_enum { + ($enum_name:ident, $($variant:ident => $type:ty),*) => { + $( + impl From<$type> for $enum_name { + fn from(value: $type) -> Self { + $enum_name::$variant(value.into()) + } + } + )* + }; +} + +impl_from_for_enum!(Value, None => (), RustArray => RustArray, RustArray => Vec, U32 => u32, Ptr => *mut c_void); + +#[repr(C)] +pub struct RustResult { + pub success: bool, + pub value: Value, + pub error: *const c_char, +} + +impl RustResult { + pub fn from_ptr(value: *mut c_void) -> Self { + RustResult { + success: true, + value: Value::Ptr(value), + error: std::ptr::null(), + } + } + + pub fn from_error(error: String) -> Self { + RustResult { + success: false, + value: Value::None(()), + error: create_string(&error), + } + } +} + +impl From> for RustResult +where + T: Into, +{ + fn from(value: error::Result) -> Self { + match value { + Ok(v) => RustResult { + success: true, + value: v.into(), + error: null(), + }, + Err(e) => RustResult { + success: false, + value: Value::None(()), + error: create_string(&e.to_string()), + }, + } + } +} + +#[no_mangle] +pub extern "C" fn free_rust_result(result: RustResult) { + match result.value { + Value::RustArray(array) => { + if !array.array.is_null() { + free_rust_array(array); + } + } + _ => {} + } + unsafe { + if !result.error.is_null() { + free_rust_string(result.error as *mut c_char); + } + } +} + +#[no_mangle] +pub extern "C" fn free_rust_error(error: *const c_char) { + unsafe { + if !error.is_null() { + free_rust_string(error as *mut c_char); + } + } +} + +// TODO: move to common +#[macro_export] +macro_rules! cstr_to_str { + ($cstr:expr) => { + unsafe { + match CStr::from_ptr($cstr).to_str() { + Ok(f) => f, + Err(e) => return RustResult::from_error(e.to_string()), + } + } + }; +} + +#[no_mangle] +pub extern "C" fn test_enum_with_array() -> RustResult { + let array = vec![1, 2, 3]; + RustResult::from(Result::Ok(array)) +} + +#[no_mangle] +pub extern "C" fn test_enum_with_ptr() -> RustResult { + let ptr = Box::into_raw(Box::new(1 as u32)); + RustResult::from(Result::Ok(ptr as *mut c_void)) +} + +#[no_mangle] +pub extern "C" fn free_test_ptr(ptr: *mut c_void) { + if ptr.is_null() { + return; + } + free_binding::(ptr); +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs index ead116d1ac8b8..0bd2ddc56ab24 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/error.rs @@ -1,10 +1,12 @@ -use core::fmt; +use core::{fmt, str}; use serde_json as json; #[derive(Debug)] pub enum TantivyBindingError { JsonError(serde_json::Error), + TantivyError(tantivy::TantivyError), + InvalidArgument(String), InternalError(String), } @@ -14,10 +16,18 @@ impl From for TantivyBindingError { } } +impl From for TantivyBindingError { + fn from(value: tantivy::TantivyError) -> Self { + TantivyBindingError::TantivyError(value) + } +} + impl fmt::Display for TantivyBindingError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { TantivyBindingError::JsonError(e) => write!(f, "JsonError: {}", e), + TantivyBindingError::TantivyError(e) => write!(f, "TantivyError: {}", e), + TantivyBindingError::InvalidArgument(e) => write!(f, "InvalidArgument: {}", e), TantivyBindingError::InternalError(e) => write!(f, "InternalError: {}", e), } } @@ -27,9 +37,17 @@ impl std::error::Error for TantivyBindingError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { TantivyBindingError::JsonError(e) => Some(e), + TantivyBindingError::TantivyError(e) => Some(e), + TantivyBindingError::InvalidArgument(_) => None, TantivyBindingError::InternalError(_) => None, } } } +impl From for TantivyBindingError { + fn from(value: str::Utf8Error) -> Self { + TantivyBindingError::InternalError(value.to_string()) + } +} + pub type Result = std::result::Result; diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs index 1659c5a3156e1..cb5f989070d10 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader.rs @@ -10,6 +10,8 @@ use crate::log::init_log; use crate::util::make_bounds; use crate::vec_collector::VecCollector; +use crate::error::{Result, TantivyBindingError}; + pub(crate) struct IndexReaderWrapper { pub(crate) field_name: String, pub(crate) field: Field, @@ -19,15 +21,15 @@ pub(crate) struct IndexReaderWrapper { } impl IndexReaderWrapper { - pub fn load(path: &str) -> IndexReaderWrapper { + pub fn load(path: &str) -> Result { init_log(); - let index = Index::open_in_dir(path).unwrap(); + let index = Index::open_in_dir(path)?; IndexReaderWrapper::from_index(Arc::new(index)) } - pub fn from_index(index: Arc) -> IndexReaderWrapper { + pub fn from_index(index: Arc) -> Result { let field = index.schema().fields().next().unwrap().0; let schema = index.schema(); let field_name = String::from(schema.get_field_name(field)); @@ -39,47 +41,51 @@ impl IndexReaderWrapper { let reader = index .reader_builder() .reload_policy(ReloadPolicy::OnCommit) // OnCommit serve for growing segment. - .try_into() - .unwrap(); - reader.reload().unwrap(); + .try_into()?; + reader.reload()?; - IndexReaderWrapper { + Ok(IndexReaderWrapper { field_name, field, reader, index, id_field, - } + }) } - pub fn reload(&self) { - self.reader.reload().unwrap(); + pub fn reload(&self) -> Result<()> { + self.reader.reload()?; + Ok(()) } - pub fn count(&self) -> u32 { - let metas = self.index.searchable_segment_metas().unwrap(); + pub fn count(&self) -> Result { + let metas = self.index.searchable_segment_metas()?; let mut sum: u32 = 0; for meta in metas { sum += meta.max_doc(); } - sum + Ok(sum) } - pub(crate) fn search(&self, q: &dyn Query) -> Vec { + pub(crate) fn search(&self, q: &dyn Query) -> Result> { let searcher = self.reader.searcher(); match self.id_field { Some(_) => { // newer version with doc_id. - searcher.search(q, &DocIdCollector {}).unwrap() + searcher + .search(q, &DocIdCollector {}) + .map_err(TantivyBindingError::TantivyError) } None => { // older version without doc_id, only one segment. - searcher.search(q, &VecCollector {}).unwrap() + searcher + .search(q, &VecCollector {}) + .map_err(TantivyBindingError::TantivyError) } } } - pub fn term_query_i64(&self, term: i64) -> Vec { + pub fn term_query_i64(&self, term: i64) -> Result> { let q = TermQuery::new( Term::from_field_i64(self.field, term), IndexRecordOption::Basic, @@ -87,7 +93,11 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn lower_bound_range_query_i64(&self, lower_bound: i64, inclusive: bool) -> Vec { + pub fn lower_bound_range_query_i64( + &self, + lower_bound: i64, + inclusive: bool, + ) -> Result> { let q = RangeQuery::new_i64_bounds( self.field_name.to_string(), make_bounds(lower_bound, inclusive), @@ -96,7 +106,11 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn upper_bound_range_query_i64(&self, upper_bound: i64, inclusive: bool) -> Vec { + pub fn upper_bound_range_query_i64( + &self, + upper_bound: i64, + inclusive: bool, + ) -> Result> { let q = RangeQuery::new_i64_bounds( self.field_name.to_string(), Bound::Unbounded, @@ -111,14 +125,14 @@ impl IndexReaderWrapper { upper_bound: i64, lb_inclusive: bool, ub_inclusive: bool, - ) -> Vec { + ) -> Result> { let lb = make_bounds(lower_bound, lb_inclusive); let ub = make_bounds(upper_bound, ub_inclusive); let q = RangeQuery::new_i64_bounds(self.field_name.to_string(), lb, ub); self.search(&q) } - pub fn term_query_f64(&self, term: f64) -> Vec { + pub fn term_query_f64(&self, term: f64) -> Result> { let q = TermQuery::new( Term::from_field_f64(self.field, term), IndexRecordOption::Basic, @@ -126,7 +140,11 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn lower_bound_range_query_f64(&self, lower_bound: f64, inclusive: bool) -> Vec { + pub fn lower_bound_range_query_f64( + &self, + lower_bound: f64, + inclusive: bool, + ) -> Result> { let q = RangeQuery::new_f64_bounds( self.field_name.to_string(), make_bounds(lower_bound, inclusive), @@ -135,7 +153,11 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn upper_bound_range_query_f64(&self, upper_bound: f64, inclusive: bool) -> Vec { + pub fn upper_bound_range_query_f64( + &self, + upper_bound: f64, + inclusive: bool, + ) -> Result> { let q = RangeQuery::new_f64_bounds( self.field_name.to_string(), Bound::Unbounded, @@ -150,14 +172,14 @@ impl IndexReaderWrapper { upper_bound: f64, lb_inclusive: bool, ub_inclusive: bool, - ) -> Vec { + ) -> Result> { let lb = make_bounds(lower_bound, lb_inclusive); let ub = make_bounds(upper_bound, ub_inclusive); let q = RangeQuery::new_f64_bounds(self.field_name.to_string(), lb, ub); self.search(&q) } - pub fn term_query_bool(&self, term: bool) -> Vec { + pub fn term_query_bool(&self, term: bool) -> Result> { let q = TermQuery::new( Term::from_field_bool(self.field, term), IndexRecordOption::Basic, @@ -165,7 +187,7 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn term_query_keyword(&self, term: &str) -> Vec { + pub fn term_query_keyword(&self, term: &str) -> Result> { let q = TermQuery::new( Term::from_field_text(self.field, term), IndexRecordOption::Basic, @@ -173,7 +195,11 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn lower_bound_range_query_keyword(&self, lower_bound: &str, inclusive: bool) -> Vec { + pub fn lower_bound_range_query_keyword( + &self, + lower_bound: &str, + inclusive: bool, + ) -> Result> { let q = RangeQuery::new_str_bounds( self.field_name.to_string(), make_bounds(lower_bound, inclusive), @@ -182,7 +208,11 @@ impl IndexReaderWrapper { self.search(&q) } - pub fn upper_bound_range_query_keyword(&self, upper_bound: &str, inclusive: bool) -> Vec { + pub fn upper_bound_range_query_keyword( + &self, + upper_bound: &str, + inclusive: bool, + ) -> Result> { let q = RangeQuery::new_str_bounds( self.field_name.to_string(), Bound::Unbounded, @@ -197,21 +227,21 @@ impl IndexReaderWrapper { upper_bound: &str, lb_inclusive: bool, ub_inclusive: bool, - ) -> Vec { + ) -> Result> { let lb = make_bounds(lower_bound, lb_inclusive); let ub = make_bounds(upper_bound, ub_inclusive); let q = RangeQuery::new_str_bounds(self.field_name.to_string(), lb, ub); self.search(&q) } - pub fn prefix_query_keyword(&self, prefix: &str) -> Vec { + pub fn prefix_query_keyword(&self, prefix: &str) -> Result> { let escaped = regex::escape(prefix); let pattern = format!("{}(.|\n)*", escaped); self.regex_query(&pattern) } - pub fn regex_query(&self, pattern: &str) -> Vec { - let q = RegexQuery::from_pattern(&pattern, self.field).unwrap(); + pub fn regex_query(&self, pattern: &str) -> Result> { + let q = RegexQuery::from_pattern(&pattern, self.field)?; self.search(&q) } } @@ -244,10 +274,10 @@ mod test { index_writer.commit().unwrap(); let index_shared = Arc::new(index); - let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared); - let mut res = index_reader_wrapper.prefix_query_keyword("^"); + let index_reader_wrapper = IndexReaderWrapper::from_index(index_shared).unwrap(); + let mut res = index_reader_wrapper.prefix_query_keyword("^").unwrap(); assert_eq!(res.len(), 1); - res = index_reader_wrapper.prefix_query_keyword("$"); + res = index_reader_wrapper.prefix_query_keyword("$").unwrap(); assert_eq!(res.len(), 1); } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs index 60e61360d87a5..1adedbd49cf28 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_c.rs @@ -1,18 +1,25 @@ -use std::ffi::{c_char, c_void, CStr}; +use std::{ + ffi::{c_char, c_void, CStr}, + ptr::null, +}; use crate::{ - array::RustArray, + array::{RustArray, RustResult}, + cstr_to_str, index_reader::IndexReaderWrapper, + string_c::create_string, util::{create_binding, free_binding}, util_c::tantivy_index_exist, }; #[no_mangle] -pub extern "C" fn tantivy_load_index(path: *const c_char) -> *mut c_void { +pub extern "C" fn tantivy_load_index(path: *const c_char) -> RustResult { assert!(tantivy_index_exist(path)); - let path_str = unsafe { CStr::from_ptr(path) }; - let wrapper = IndexReaderWrapper::load(path_str.to_str().unwrap()); - create_binding(wrapper) + let path_str = cstr_to_str!(path); + match IndexReaderWrapper::load(path_str) { + Ok(w) => RustResult::from_ptr(create_binding(w)), + Err(e) => RustResult::from_error(e.to_string()), + } } #[no_mangle] @@ -22,26 +29,21 @@ pub extern "C" fn tantivy_free_index_reader(ptr: *mut c_void) { // -------------------------query-------------------- #[no_mangle] -pub extern "C" fn tantivy_reload_index(ptr: *mut c_void) { +pub extern "C" fn tantivy_reload_index(ptr: *mut c_void) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - (*real).reload(); - } + unsafe { (*real).reload().into() } } #[no_mangle] -pub extern "C" fn tantivy_index_count(ptr: *mut c_void) -> u32 { +pub extern "C" fn tantivy_index_count(ptr: *mut c_void) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { (*real).count() } + unsafe { (*real).count().into() } } #[no_mangle] -pub extern "C" fn tantivy_term_query_i64(ptr: *mut c_void, term: i64) -> RustArray { +pub extern "C" fn tantivy_term_query_i64(ptr: *mut c_void, term: i64) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - let hits = (*real).term_query_i64(term); - RustArray::from_vec(hits) - } + unsafe { (*real).term_query_i64(term).into() } } #[no_mangle] @@ -49,11 +51,12 @@ pub extern "C" fn tantivy_lower_bound_range_query_i64( ptr: *mut c_void, lower_bound: i64, inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let hits = (*real).lower_bound_range_query_i64(lower_bound, inclusive); - RustArray::from_vec(hits) + (*real) + .lower_bound_range_query_i64(lower_bound, inclusive) + .into() } } @@ -62,11 +65,12 @@ pub extern "C" fn tantivy_upper_bound_range_query_i64( ptr: *mut c_void, upper_bound: i64, inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let hits = (*real).upper_bound_range_query_i64(upper_bound, inclusive); - RustArray::from_vec(hits) + (*real) + .upper_bound_range_query_i64(upper_bound, inclusive) + .into() } } @@ -77,21 +81,19 @@ pub extern "C" fn tantivy_range_query_i64( upper_bound: i64, lb_inclusive: bool, ub_inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let hits = (*real).range_query_i64(lower_bound, upper_bound, lb_inclusive, ub_inclusive); - RustArray::from_vec(hits) + (*real) + .range_query_i64(lower_bound, upper_bound, lb_inclusive, ub_inclusive) + .into() } } #[no_mangle] -pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustArray { +pub extern "C" fn tantivy_term_query_f64(ptr: *mut c_void, term: f64) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - let hits = (*real).term_query_f64(term); - RustArray::from_vec(hits) - } + unsafe { (*real).term_query_f64(term).into() } } #[no_mangle] @@ -99,11 +101,12 @@ pub extern "C" fn tantivy_lower_bound_range_query_f64( ptr: *mut c_void, lower_bound: f64, inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let hits = (*real).lower_bound_range_query_f64(lower_bound, inclusive); - RustArray::from_vec(hits) + (*real) + .lower_bound_range_query_f64(lower_bound, inclusive) + .into() } } @@ -112,11 +115,12 @@ pub extern "C" fn tantivy_upper_bound_range_query_f64( ptr: *mut c_void, upper_bound: f64, inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let hits = (*real).upper_bound_range_query_f64(upper_bound, inclusive); - RustArray::from_vec(hits) + (*real) + .upper_bound_range_query_f64(upper_bound, inclusive) + .into() } } @@ -127,31 +131,26 @@ pub extern "C" fn tantivy_range_query_f64( upper_bound: f64, lb_inclusive: bool, ub_inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let hits = (*real).range_query_f64(lower_bound, upper_bound, lb_inclusive, ub_inclusive); - RustArray::from_vec(hits) + (*real) + .range_query_f64(lower_bound, upper_bound, lb_inclusive, ub_inclusive) + .into() } } #[no_mangle] -pub extern "C" fn tantivy_term_query_bool(ptr: *mut c_void, term: bool) -> RustArray { +pub extern "C" fn tantivy_term_query_bool(ptr: *mut c_void, term: bool) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - let hits = (*real).term_query_bool(term); - RustArray::from_vec(hits) - } + unsafe { (*real).term_query_bool(term).into() } } #[no_mangle] -pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_char) -> RustArray { +pub extern "C" fn tantivy_term_query_keyword(ptr: *mut c_void, term: *const c_char) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - let c_str = CStr::from_ptr(term); - let hits = (*real).term_query_keyword(c_str.to_str().unwrap()); - RustArray::from_vec(hits) - } + let term = cstr_to_str!(term); + unsafe { (*real).term_query_keyword(term).into() } } #[no_mangle] @@ -159,13 +158,13 @@ pub extern "C" fn tantivy_lower_bound_range_query_keyword( ptr: *mut c_void, lower_bound: *const c_char, inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; + let lower_bound = cstr_to_str!(lower_bound); unsafe { - let c_lower_bound = CStr::from_ptr(lower_bound); - let hits = - (*real).lower_bound_range_query_keyword(c_lower_bound.to_str().unwrap(), inclusive); - RustArray::from_vec(hits) + (*real) + .lower_bound_range_query_keyword(lower_bound, inclusive) + .into() } } @@ -174,13 +173,13 @@ pub extern "C" fn tantivy_upper_bound_range_query_keyword( ptr: *mut c_void, upper_bound: *const c_char, inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; + let upper_bound = cstr_to_str!(upper_bound); unsafe { - let c_upper_bound = CStr::from_ptr(upper_bound); - let hits = - (*real).upper_bound_range_query_keyword(c_upper_bound.to_str().unwrap(), inclusive); - RustArray::from_vec(hits) + (*real) + .upper_bound_range_query_keyword(upper_bound, inclusive) + .into() } } @@ -191,18 +190,14 @@ pub extern "C" fn tantivy_range_query_keyword( upper_bound: *const c_char, lb_inclusive: bool, ub_inclusive: bool, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; + let lower_bound = cstr_to_str!(lower_bound); + let upper_bound = cstr_to_str!(upper_bound); unsafe { - let c_lower_bound = CStr::from_ptr(lower_bound); - let c_upper_bound = CStr::from_ptr(upper_bound); - let hits = (*real).range_query_keyword( - c_lower_bound.to_str().unwrap(), - c_upper_bound.to_str().unwrap(), - lb_inclusive, - ub_inclusive, - ); - RustArray::from_vec(hits) + (*real) + .range_query_keyword(lower_bound, upper_bound, lb_inclusive, ub_inclusive) + .into() } } @@ -210,21 +205,15 @@ pub extern "C" fn tantivy_range_query_keyword( pub extern "C" fn tantivy_prefix_query_keyword( ptr: *mut c_void, prefix: *const c_char, -) -> RustArray { +) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - let c_str = CStr::from_ptr(prefix); - let hits = (*real).prefix_query_keyword(c_str.to_str().unwrap()); - RustArray::from_vec(hits) - } + let prefix = cstr_to_str!(prefix); + unsafe { (*real).prefix_query_keyword(prefix).into() } } #[no_mangle] -pub extern "C" fn tantivy_regex_query(ptr: *mut c_void, pattern: *const c_char) -> RustArray { +pub extern "C" fn tantivy_regex_query(ptr: *mut c_void, pattern: *const c_char) -> RustResult { let real = ptr as *mut IndexReaderWrapper; - unsafe { - let c_str = CStr::from_ptr(pattern); - let hits = (*real).regex_query(c_str.to_str().unwrap()); - RustArray::from_vec(hits) - } + let pattern = cstr_to_str!(pattern); + unsafe { (*real).regex_query(pattern).into() } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text.rs index ef6e2d6cb6552..f83df709c4098 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text.rs @@ -4,12 +4,13 @@ use tantivy::{ Term, }; +use crate::error::Result; use crate::{index_reader::IndexReaderWrapper, tokenizer::standard_analyzer}; impl IndexReaderWrapper { // split the query string into multiple tokens using index's default tokenizer, // and then execute the disconjunction of term query. - pub(crate) fn match_query(&self, q: &str) -> Vec { + pub(crate) fn match_query(&self, q: &str) -> Result> { // clone the tokenizer to make `match_query` thread-safe. let mut tokenizer = self .index diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text_c.rs index d245ac903cedd..a7f6f12ef56d1 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text_c.rs @@ -1,22 +1,22 @@ -use std::{ffi::CStr}; +use std::{ffi::CStr, ptr::null}; use libc::{c_char, c_void}; use crate::{ - array::RustArray, - string_c::c_str_to_str, + array::{RustArray, RustResult}, + cstr_to_str, index_reader::IndexReaderWrapper, - tokenizer::create_tokenizer, log::init_log, + string_c::{c_str_to_str, create_string}, + tokenizer::create_tokenizer, }; #[no_mangle] -pub extern "C" fn tantivy_match_query(ptr: *mut c_void, query: *const c_char) -> RustArray { +pub extern "C" fn tantivy_match_query(ptr: *mut c_void, query: *const c_char) -> RustResult { let real = ptr as *mut IndexReaderWrapper; unsafe { - let c_str = CStr::from_ptr(query); - let hits = (*real).match_query(c_str.to_str().unwrap()); - RustArray::from_vec(hits) + let query = cstr_to_str!(query); + (*real).match_query(query).into() } } @@ -25,21 +25,17 @@ pub extern "C" fn tantivy_register_tokenizer( ptr: *mut c_void, tokenizer_name: *const c_char, analyzer_params: *const c_char, -) { +) -> RustResult { init_log(); let real = ptr as *mut IndexReaderWrapper; - let tokenizer_name_str = unsafe { CStr::from_ptr(tokenizer_name) }; - let params = unsafe{c_str_to_str(analyzer_params).to_string()}; - let analyzer = create_tokenizer(¶ms); + let tokenizer_name = cstr_to_str!(tokenizer_name); + let params = cstr_to_str!(analyzer_params); + let analyzer = create_tokenizer(params); match analyzer { Ok(text_analyzer) => unsafe { - (*real).register_tokenizer( - String::from(tokenizer_name_str.to_str().unwrap()), - text_analyzer, - ); - }, - Err(err) => { - panic!("create tokenizer failed with error: {} param: {}", err.to_string(), params); + (*real).register_tokenizer(String::from(tokenizer_name), text_analyzer); + Ok(()).into() }, + Err(err) => RustResult::from_error(err.to_string()), } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs index a1a27da05cd25..8d838b015d5fb 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs @@ -10,6 +10,7 @@ use tantivy::{doc, tokenizer, Document, Index, IndexWriter}; use crate::data_type::TantivyDataType; +use crate::error::Result; use crate::index_reader::IndexReaderWrapper; use crate::log::init_log; @@ -27,7 +28,7 @@ impl IndexWriterWrapper { path: String, num_threads: usize, overall_memory_budget_in_bytes: usize, - ) -> IndexWriterWrapper { + ) -> Result { init_log(); let field: Field; @@ -55,171 +56,170 @@ impl IndexWriterWrapper { } let id_field = schema_builder.add_i64_field("doc_id", FAST); let schema = schema_builder.build(); - let index = Index::create_in_dir(path.clone(), schema).unwrap(); - let index_writer = index - .writer_with_num_threads(num_threads, overall_memory_budget_in_bytes) - .unwrap(); - IndexWriterWrapper { + let index = Index::create_in_dir(path.clone(), schema)?; + let index_writer = + index.writer_with_num_threads(num_threads, overall_memory_budget_in_bytes)?; + Ok(IndexWriterWrapper { field, index_writer, id_field, index: Arc::new(index), - } + }) } - pub fn create_reader(&self) -> IndexReaderWrapper { + pub fn create_reader(&self) -> Result { IndexReaderWrapper::from_index(self.index.clone()) } - pub fn add_i8(&mut self, data: i8, offset: i64) { + pub fn add_i8(&mut self, data: i8, offset: i64) -> Result<()> { self.add_i64(data.into(), offset) } - pub fn add_i16(&mut self, data: i16, offset: i64) { + pub fn add_i16(&mut self, data: i16, offset: i64) -> Result<()> { self.add_i64(data.into(), offset) } - pub fn add_i32(&mut self, data: i32, offset: i64) { + pub fn add_i32(&mut self, data: i32, offset: i64) -> Result<()> { self.add_i64(data.into(), offset) } - pub fn add_i64(&mut self, data: i64, offset: i64) { - self.index_writer - .add_document(doc!( - self.field => data, - self.id_field => offset, - )) - .unwrap(); + pub fn add_i64(&mut self, data: i64, offset: i64) -> Result<()> { + let _ = self.index_writer.add_document(doc!( + self.field => data, + self.id_field => offset, + ))?; + Ok(()) } - pub fn add_f32(&mut self, data: f32, offset: i64) { + pub fn add_f32(&mut self, data: f32, offset: i64) -> Result<()> { self.add_f64(data.into(), offset) } - pub fn add_f64(&mut self, data: f64, offset: i64) { - self.index_writer - .add_document(doc!( - self.field => data, - self.id_field => offset, - )) - .unwrap(); + pub fn add_f64(&mut self, data: f64, offset: i64) -> Result<()> { + let _ = self.index_writer.add_document(doc!( + self.field => data, + self.id_field => offset, + ))?; + Ok(()) } - pub fn add_bool(&mut self, data: bool, offset: i64) { - self.index_writer - .add_document(doc!( - self.field => data, - self.id_field => offset, - )) - .unwrap(); + pub fn add_bool(&mut self, data: bool, offset: i64) -> Result<()> { + let _ = self.index_writer.add_document(doc!( + self.field => data, + self.id_field => offset, + ))?; + Ok(()) } - pub fn add_string(&mut self, data: &str, offset: i64) { - self.index_writer - .add_document(doc!( - self.field => data, - self.id_field => offset, - )) - .unwrap(); + pub fn add_string(&mut self, data: &str, offset: i64) -> Result<()> { + let _ = self.index_writer.add_document(doc!( + self.field => data, + self.id_field => offset, + ))?; + Ok(()) } - pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) { + pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data as i64); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) { + pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data as i64); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) { + pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data as i64); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) { + pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) { + pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data as f64); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) { + pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) { + pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) -> Result<()> { let mut document = Document::default(); for data in datas { document.add_field_value(self.field, *data); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) { + pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) -> Result<()> { let mut document = Document::default(); for element in datas { let data = unsafe { CStr::from_ptr(*element) }; - document.add_field_value(self.field, data.to_str().unwrap()); + document.add_field_value(self.field, data.to_str()?); } document.add_i64(self.id_field, offset); - self.index_writer.add_document(document).unwrap(); + let _ = self.index_writer.add_document(document)?; + Ok(()) } - fn manual_merge(&mut self) { - let metas = self - .index_writer - .index() - .searchable_segment_metas() - .unwrap(); + fn manual_merge(&mut self) -> Result<()> { + let metas = self.index_writer.index().searchable_segment_metas()?; let policy = self.index_writer.get_merge_policy(); let candidates = policy.compute_merge_candidates(metas.as_slice()); for candidate in candidates { - self.index_writer - .merge(candidate.0.as_slice()) - .wait() - .unwrap(); + self.index_writer.merge(candidate.0.as_slice()).wait()?; } + Ok(()) } - pub fn finish(mut self) { - self.index_writer.commit().unwrap(); + pub fn finish(mut self) -> Result<()> { + self.index_writer.commit()?; // self.manual_merge(); - block_on(self.index_writer.garbage_collect_files()).unwrap(); - self.index_writer.wait_merging_threads().unwrap(); + block_on(self.index_writer.garbage_collect_files())?; + self.index_writer.wait_merging_threads()?; + Ok(()) } - pub(crate) fn commit(&mut self) { - self.index_writer.commit().unwrap(); + pub(crate) fn commit(&mut self) -> Result<()> { + self.index_writer.commit()?; + Ok(()) } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs index 9cb81d7129325..9f8a1f3a61498 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs @@ -1,8 +1,13 @@ use core::slice; use std::ffi::{c_char, c_void, CStr}; +use tantivy::Index; + use crate::{ + array::RustResult, + cstr_to_str, data_type::TantivyDataType, + error::Result, index_writer::IndexWriterWrapper, util::{create_binding, free_binding}, }; @@ -14,17 +19,19 @@ pub extern "C" fn tantivy_create_index( path: *const c_char, num_threads: usize, overall_memory_budget_in_bytes: usize, -) -> *mut c_void { - let field_name_str = unsafe { CStr::from_ptr(field_name) }; - let path_str = unsafe { CStr::from_ptr(path) }; - let wrapper = IndexWriterWrapper::new( - String::from(field_name_str.to_str().unwrap()), +) -> RustResult { + let field_name_str = cstr_to_str!(field_name); + let path_str = cstr_to_str!(path); + match IndexWriterWrapper::new( + String::from(field_name_str), data_type, - String::from(path_str.to_str().unwrap()), + String::from(path_str), num_threads, overall_memory_budget_in_bytes, - ); - create_binding(wrapper) + ) { + Ok(wrapper) => RustResult::from_ptr(create_binding(wrapper)), + Err(e) => RustResult::from_error(e.to_string()), + } } #[no_mangle] @@ -35,24 +42,25 @@ pub extern "C" fn tantivy_free_index_writer(ptr: *mut c_void) { // tantivy_finish_index will finish the index writer, and the index writer can't be used any more. // After this was called, you should reset the pointer to null. #[no_mangle] -pub extern "C" fn tantivy_finish_index(ptr: *mut c_void) { +pub extern "C" fn tantivy_finish_index(ptr: *mut c_void) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - unsafe { Box::from_raw(real).finish() } + unsafe { Box::from_raw(real).finish().into() } } #[no_mangle] -pub extern "C" fn tantivy_commit_index(ptr: *mut c_void) { +pub extern "C" fn tantivy_commit_index(ptr: *mut c_void) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - unsafe { - (*real).commit(); - } + unsafe { (*real).commit().into() } } #[no_mangle] -pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> *mut c_void { +pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> RustResult { let writer = ptr as *mut IndexWriterWrapper; let reader = unsafe { (*writer).create_reader() }; - create_binding(reader) + match reader { + Ok(r) => RustResult::from_ptr(create_binding(r)), + Err(e) => RustResult::from_error(e.to_string()), + } } // -------------------------build-------------------- @@ -62,14 +70,10 @@ pub extern "C" fn tantivy_index_add_int8s( array: *const i8, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; - unsafe { - for (index, data) in arr.iter().enumerate() { - (*real).add_i8(*data, offset_begin + (index as i64)); - } - } + unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i8, &mut (*real)).into() } } #[no_mangle] @@ -78,14 +82,10 @@ pub extern "C" fn tantivy_index_add_int16s( array: *const i16, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; - unsafe { - for (index, data) in arr.iter().enumerate() { - (*real).add_i16(*data, offset_begin + (index as i64)); - } - } + unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i16, &mut (*real)).into() } } #[no_mangle] @@ -94,14 +94,10 @@ pub extern "C" fn tantivy_index_add_int32s( array: *const i32, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; - unsafe { - for (index, data) in arr.iter().enumerate() { - (*real).add_i32(*data, offset_begin + (index as i64)); - } - } + unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i32, &mut (*real)).into() } } #[no_mangle] @@ -110,14 +106,25 @@ pub extern "C" fn tantivy_index_add_int64s( array: *const i64, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; + + unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_i64, &mut (*real)).into() } +} + +fn execute( + arr: &[T], + offset: i64, + mut e: fn(&mut IndexWriterWrapper, T, i64) -> Result<()>, + w: &mut IndexWriterWrapper, +) -> Result<()> { unsafe { for (index, data) in arr.iter().enumerate() { - (*real).add_i64(*data, offset_begin + (index as i64)); + e(w, *data, offset + (index as i64))?; } } + Ok(()) } #[no_mangle] @@ -126,14 +133,10 @@ pub extern "C" fn tantivy_index_add_f32s( array: *const f32, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; - unsafe { - for (index, data) in arr.iter().enumerate() { - (*real).add_f32(*data, offset_begin + (index as i64)); - } - } + unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f32, &mut (*real)).into() } } #[no_mangle] @@ -142,14 +145,10 @@ pub extern "C" fn tantivy_index_add_f64s( array: *const f64, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; - unsafe { - for (index, data) in arr.iter().enumerate() { - (*real).add_f64(*data, offset_begin + (index as i64)); - } - } + unsafe { execute(arr, offset_begin, IndexWriterWrapper::add_f64, &mut (*real)).into() } } #[no_mangle] @@ -158,23 +157,31 @@ pub extern "C" fn tantivy_index_add_bools( array: *const bool, len: usize, offset_begin: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; let arr = unsafe { slice::from_raw_parts(array, len) }; unsafe { - for (index, data) in arr.iter().enumerate() { - (*real).add_bool(*data, offset_begin + (index as i64)); - } + execute( + arr, + offset_begin, + IndexWriterWrapper::add_bool, + &mut (*real), + ) + .into() } } // TODO: this is not a very efficient way, since we must call this function many times, which // will bring a lot of overhead caused by the rust binding. #[no_mangle] -pub extern "C" fn tantivy_index_add_string(ptr: *mut c_void, s: *const c_char, offset: i64) { +pub extern "C" fn tantivy_index_add_string( + ptr: *mut c_void, + s: *const c_char, + offset: i64, +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; - let c_str = unsafe { CStr::from_ptr(s) }; - unsafe { (*real).add_string(c_str.to_str().unwrap(), offset) } + let s = cstr_to_str!(s); + unsafe { (*real).add_string(s, offset).into() } } // --------------------------------------------- array ------------------------------------------ @@ -185,11 +192,11 @@ pub extern "C" fn tantivy_index_add_multi_int8s( array: *const i8, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_i8s(arr, offset) + (*real).add_multi_i8s(arr, offset).into() } } @@ -199,11 +206,11 @@ pub extern "C" fn tantivy_index_add_multi_int16s( array: *const i16, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_i16s(arr, offset); + (*real).add_multi_i16s(arr, offset).into() } } @@ -213,11 +220,11 @@ pub extern "C" fn tantivy_index_add_multi_int32s( array: *const i32, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_i32s(arr, offset); + (*real).add_multi_i32s(arr, offset).into() } } @@ -227,11 +234,11 @@ pub extern "C" fn tantivy_index_add_multi_int64s( array: *const i64, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_i64s(arr, offset); + (*real).add_multi_i64s(arr, offset).into() } } @@ -241,11 +248,11 @@ pub extern "C" fn tantivy_index_add_multi_f32s( array: *const f32, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_f32s(arr, offset); + (*real).add_multi_f32s(arr, offset).into() } } @@ -255,11 +262,11 @@ pub extern "C" fn tantivy_index_add_multi_f64s( array: *const f64, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_f64s(arr, offset); + (*real).add_multi_f64s(arr, offset).into() } } @@ -269,11 +276,11 @@ pub extern "C" fn tantivy_index_add_multi_bools( array: *const bool, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_bools(arr, offset); + (*real).add_multi_bools(arr, offset).into() } } @@ -283,10 +290,10 @@ pub extern "C" fn tantivy_index_add_multi_keywords( array: *const *const c_char, len: usize, offset: i64, -) { +) -> RustResult { let real = ptr as *mut IndexWriterWrapper; unsafe { let arr = slice::from_raw_parts(array, len); - (*real).add_multi_keywords(arr, offset) + (*real).add_multi_keywords(arr, offset).into() } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_text_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_text_c.rs index b94d04506db94..fdc7fbfdab963 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_text_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_text_c.rs @@ -2,11 +2,14 @@ use std::ffi::c_char; use std::ffi::c_void; use std::ffi::CStr; +use crate::array::RustResult; +use crate::cstr_to_str; +use crate::error::Result; use crate::index_writer::IndexWriterWrapper; +use crate::log::init_log; +use crate::string_c::c_str_to_str; use crate::tokenizer::create_tokenizer; use crate::util::create_binding; -use crate::string_c::c_str_to_str; -use crate::log::init_log; #[no_mangle] pub extern "C" fn tantivy_create_text_writer( @@ -17,13 +20,13 @@ pub extern "C" fn tantivy_create_text_writer( num_threads: usize, overall_memory_budget_in_bytes: usize, in_ram: bool, -) -> *mut c_void { +) -> RustResult { init_log(); - let field_name_str = unsafe { CStr::from_ptr(field_name).to_str().unwrap() }; - let path_str = unsafe { CStr::from_ptr(path).to_str().unwrap() }; - let tokenizer_name_str = unsafe { CStr::from_ptr(tokenizer_name).to_str().unwrap() }; - let params = unsafe{c_str_to_str(analyzer_params).to_string()}; - let analyzer = create_tokenizer(¶ms); + let field_name_str = cstr_to_str!(field_name); + let path_str = cstr_to_str!(path); + let tokenizer_name_str = cstr_to_str!(tokenizer_name); + let params = cstr_to_str!(analyzer_params); + let analyzer = create_tokenizer(params); match analyzer { Ok(text_analyzer) => { let wrapper = IndexWriterWrapper::create_text_writer( @@ -35,11 +38,12 @@ pub extern "C" fn tantivy_create_text_writer( overall_memory_budget_in_bytes, in_ram, ); - create_binding(wrapper) + RustResult::from_ptr(create_binding(wrapper)) } - Err(err) => { - log::warn!("create tokenizer failed with error: {} param: {}", err.to_string(), params); - std::ptr::null_mut() - }, + Err(err) => RustResult::from_error(format!( + "create tokenizer failed with error: {} param: {}", + err.to_string(), + params, + )), } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs index b910774a2e164..06c440f39bc81 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs @@ -280,7 +280,7 @@ pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result Result { +pub(crate) fn create_tokenizer(params: &str) -> Result { if params.len() == 0 { return Ok(standard_analyzer(vec![])); } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs index 1cac3328a5848..6290591d27526 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs @@ -1,31 +1,33 @@ -use libc::{c_void,c_char}; +use libc::{c_char, c_void}; use tantivy::tokenizer::TextAnalyzer; use crate::{ + array::RustResult, + log::init_log, string_c::c_str_to_str, tokenizer::create_tokenizer, util::{create_binding, free_binding}, - log::init_log, }; #[no_mangle] -pub extern "C" fn tantivy_create_tokenizer(analyzer_params: *const c_char) -> *mut c_void { +pub extern "C" fn tantivy_create_tokenizer(analyzer_params: *const c_char) -> RustResult { init_log(); - let params = unsafe{c_str_to_str(analyzer_params).to_string()}; + let params = unsafe { c_str_to_str(analyzer_params).to_string() }; let analyzer = create_tokenizer(¶ms); match analyzer { - Ok(text_analyzer) => create_binding(text_analyzer), - Err(err) => { - log::warn!("create tokenizer failed with error: {} param: {}", err.to_string(), params); - std::ptr::null_mut() - }, + Ok(text_analyzer) => RustResult::from_ptr(create_binding(text_analyzer)), + Err(err) => RustResult::from_error(format!( + "create tokenizer failed with error: {} param: {}", + err.to_string(), + params, + )), } } #[no_mangle] pub extern "C" fn tantivy_clone_tokenizer(ptr: *mut c_void) -> *mut c_void { - let analyzer=ptr as *mut TextAnalyzer; - let clone = unsafe {(*analyzer).clone()}; + let analyzer = ptr as *mut TextAnalyzer; + let clone = unsafe { (*analyzer).clone() }; create_binding(clone) } diff --git a/internal/core/thirdparty/tantivy/tantivy-wrapper.h b/internal/core/thirdparty/tantivy/tantivy-wrapper.h index 09edc0d92337f..b6e956b47d5a2 100644 --- a/internal/core/thirdparty/tantivy/tantivy-wrapper.h +++ b/internal/core/thirdparty/tantivy/tantivy-wrapper.h @@ -4,7 +4,9 @@ #include #include #include +#include +#include "common/EasyAssert.h" #include "tantivy-binding.h" #include "rust-binding.h" #include "rust-array.h" @@ -82,18 +84,27 @@ struct TantivyIndexWrapper { uintptr_t num_threads = DEFAULT_NUM_THREADS, uintptr_t overall_memory_budget_in_bytes = DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) { - writer_ = tantivy_create_index(field_name, - data_type, - path, - num_threads, - overall_memory_budget_in_bytes); + auto res = RustResultWrapper( + tantivy_create_index(field_name, + data_type, + path, + num_threads, + overall_memory_budget_in_bytes)); + AssertInfo(res.result_->success, + "failed to create index: {}", + res.result_->error); + writer_ = res.result_->value.ptr._0; path_ = std::string(path); } // load index. create index reader. explicit TantivyIndexWrapper(const char* path) { assert(tantivy_index_exist(path)); - reader_ = tantivy_load_index(path); + auto res = RustResultWrapper(tantivy_load_index(path)); + AssertInfo(res.result_->success, + "failed to load index: {}", + res.result_->error); + reader_ = res.result_->value.ptr._0; path_ = std::string(path); } @@ -106,13 +117,18 @@ struct TantivyIndexWrapper { uintptr_t num_threads = DEFAULT_NUM_THREADS, uintptr_t overall_memory_budget_in_bytes = DEFAULT_OVERALL_MEMORY_BUDGET_IN_BYTES) { - writer_ = tantivy_create_text_writer(field_name, - path, - tokenizer_name, - analyzer_params, - num_threads, - overall_memory_budget_in_bytes, - in_ram); + auto res = RustResultWrapper( + tantivy_create_text_writer(field_name, + path, + tokenizer_name, + analyzer_params, + num_threads, + overall_memory_budget_in_bytes, + in_ram)); + AssertInfo(res.result_->success, + "failed to create text writer: {}", + res.result_->error); + writer_ = res.result_->value.ptr._0; path_ = std::string(path); } @@ -120,10 +136,19 @@ struct TantivyIndexWrapper { void create_reader() { if (writer_ != nullptr) { - reader_ = tantivy_create_reader_from_writer(writer_); + auto res = + RustResultWrapper(tantivy_create_reader_from_writer(writer_)); + AssertInfo(res.result_->success, + "failed to create reader from writer: {}", + res.result_->error); + reader_ = res.result_->value.ptr._0; } else if (!path_.empty()) { assert(tantivy_index_exist(path_.c_str())); - reader_ = tantivy_load_index(path_.c_str()); + auto res = RustResultWrapper(tantivy_load_index(path_.c_str())); + AssertInfo(res.result_->success, + "failed to load index: {}", + res.result_->error); + reader_ = res.result_->value.ptr._0; } } @@ -135,8 +160,11 @@ struct TantivyIndexWrapper { register_tokenizer(const char* tokenizer_name, const char* analyzer_params) { if (reader_ != nullptr) { - tantivy_register_tokenizer( - reader_, tokenizer_name, analyzer_params); + auto res = RustResultWrapper(tantivy_register_tokenizer( + reader_, tokenizer_name, analyzer_params)); + AssertInfo(res.result_->success, + "failed to register tokenizer: {}", + res.result_->error); } } @@ -146,47 +174,78 @@ struct TantivyIndexWrapper { assert(!finished_); if constexpr (std::is_same_v) { - tantivy_index_add_bools(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_bools(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add bools: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_int8s(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_int8s(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add int8s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_int16s(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_int16s(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add int16s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_int32s(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_int32s(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add int32s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_int64s(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_int64s(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add int64s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_f32s(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_f32s(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add f32s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_f64s(writer_, array, len, offset_begin); + auto res = RustResultWrapper( + tantivy_index_add_f64s(writer_, array, len, offset_begin)); + AssertInfo(res.result_->success, + "failed to add f64s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { // TODO: not very efficient, a lot of overhead due to rust-ffi call. for (uintptr_t i = 0; i < len; i++) { - tantivy_index_add_string( + auto res = RustResultWrapper(tantivy_index_add_string( writer_, static_cast(array)[i].c_str(), - offset_begin + i); + offset_begin + i)); + AssertInfo(res.result_->success, + "failed to add string: {}", + res.result_->error); } return; } @@ -201,37 +260,65 @@ struct TantivyIndexWrapper { assert(!finished_); if constexpr (std::is_same_v) { - tantivy_index_add_multi_bools(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_bools(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi bools: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_multi_int8s(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_int8s(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi int8s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_multi_int16s(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_int16s(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi int16s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_multi_int32s(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_int32s(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi int32s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_multi_int64s(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_int64s(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi int64s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_multi_f32s(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_f32s(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi f32s: {}", + res.result_->error); return; } if constexpr (std::is_same_v) { - tantivy_index_add_multi_f64s(writer_, array, len, offset); + auto res = RustResultWrapper( + tantivy_index_add_multi_f64s(writer_, array, len, offset)); + AssertInfo(res.result_->success, + "failed to add multi f64s: {}", + res.result_->error); return; } @@ -240,8 +327,11 @@ struct TantivyIndexWrapper { for (uintptr_t i = 0; i < len; i++) { views.push_back(array[i].c_str()); } - tantivy_index_add_multi_keywords( - writer_, views.data(), len, offset); + auto res = RustResultWrapper(tantivy_index_add_multi_keywords( + writer_, views.data(), len, offset)); + AssertInfo(res.result_->success, + "failed to add multi keywords: {}", + res.result_->error); return; } @@ -256,7 +346,10 @@ struct TantivyIndexWrapper { return; } - tantivy_finish_index(writer_); + auto res = RustResultWrapper(tantivy_finish_index(writer_)); + AssertInfo(res.result_->success, + "failed to finish index: {}", + res.result_->error); writer_ = nullptr; finished_ = true; } @@ -264,20 +357,30 @@ struct TantivyIndexWrapper { inline void commit() { if (writer_ != nullptr) { - tantivy_commit_index(writer_); + auto res = RustResultWrapper(tantivy_commit_index(writer_)); + AssertInfo(res.result_->success, + "failed to commit index: {}", + res.result_->error); } } inline void reload() { if (reader_ != nullptr) { - tantivy_reload_index(reader_); + auto res = RustResultWrapper(tantivy_reload_index(reader_)); + AssertInfo(res.result_->success, + "failed to reload index: {}", + res.result_->error); } } inline uint32_t count() { - return tantivy_index_count(reader_); + auto res = RustResultWrapper(tantivy_index_count(reader_)); + AssertInfo(res.result_->success, + "failed to get count: {}", + res.result_->error); + return res.result_->value.u32._0; } public: @@ -308,7 +411,14 @@ struct TantivyIndexWrapper { "InvertedIndex.term_query: unsupported data type: {}", typeid(T).name()); }(); - return RustArrayWrapper(array); + + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.term_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.term_query: invalid result type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } template @@ -337,7 +447,15 @@ struct TantivyIndexWrapper { "{}", typeid(T).name()); }(); - return RustArrayWrapper(array); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.lower_bound_range_query: {}", + res.result_->error); + AssertInfo( + res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.lower_bound_range_query: invalid result " + "type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } template @@ -366,7 +484,15 @@ struct TantivyIndexWrapper { "{}", typeid(T).name()); }(); - return RustArrayWrapper(array); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.upper_bound_range_query: {}", + res.result_->error); + AssertInfo( + res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.upper_bound_range_query: invalid result " + "type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } template @@ -406,25 +532,49 @@ struct TantivyIndexWrapper { "InvertedIndex.range_query: unsupported data type: {}", typeid(T).name()); }(); - return RustArrayWrapper(array); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.range_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.range_query: invalid result type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } RustArrayWrapper prefix_query(const std::string& prefix) { auto array = tantivy_prefix_query_keyword(reader_, prefix.c_str()); - return RustArrayWrapper(array); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.prefix_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.prefix_query: invalid result type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } RustArrayWrapper regex_query(const std::string& pattern) { auto array = tantivy_regex_query(reader_, pattern.c_str()); - return RustArrayWrapper(array); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.regex_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.regex_query: invalid result type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } RustArrayWrapper match_query(const std::string& query) { auto array = tantivy_match_query(reader_, query.c_str()); - return RustArrayWrapper(array); + auto res = RustResultWrapper(array); + AssertInfo(res.result_->success, + "TantivyIndexWrapper.match_query: {}", + res.result_->error); + AssertInfo(res.result_->value.tag == Value::Tag::RustArray, + "TantivyIndexWrapper.match_query: invalid result type"); + return RustArrayWrapper(std::move(res.result_->value.rust_array._0)); } public: diff --git a/internal/core/thirdparty/tantivy/tokenizer.h b/internal/core/thirdparty/tantivy/tokenizer.h index eeeec4db6de3e..c4be0ee314284 100644 --- a/internal/core/thirdparty/tantivy/tokenizer.h +++ b/internal/core/thirdparty/tantivy/tokenizer.h @@ -3,6 +3,7 @@ #include "tantivy-binding.h" #include "rust-binding.h" #include "rust-hashmap.h" +#include "tantivy/rust-array.h" #include "token-stream.h" namespace milvus::tantivy { @@ -13,10 +14,12 @@ struct Tokenizer { explicit Tokenizer(std::string&& params) { auto shared_params = std::make_shared(std::move(params)); - ptr_ = tantivy_create_tokenizer(shared_params->c_str()); - if (ptr_ == nullptr) { - throw std::invalid_argument("invalid tokenizer parameters"); - } + auto res = + RustResultWrapper(tantivy_create_tokenizer(shared_params->c_str())); + AssertInfo(res.result_->success, + "Tokenizer creation failed: {}", + res.result_->error); + ptr_ = res.result_->value.ptr._0; } explicit Tokenizer(void* _ptr) : ptr_(_ptr) { diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 72cf5fe64966e..53a3b8018dbe4 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -88,6 +88,7 @@ set(MILVUS_TEST_FILES test_utils.cpp test_chunked_segment.cpp test_chunked_column.cpp + test_rust_result.cpp ) if ( INDEX_ENGINE STREQUAL "cardinal" ) diff --git a/internal/core/unittest/test_rust_result.cpp b/internal/core/unittest/test_rust_result.cpp new file mode 100644 index 0000000000000..a835476dd3ec8 --- /dev/null +++ b/internal/core/unittest/test_rust_result.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include "gtest/gtest.h" +#include "tantivy-binding.h" +TEST(RustResultTest, TestResult) { + auto arr = test_enum_with_array(); + auto len = arr.value.rust_array._0.len; + for (size_t i = 0; i < len; i++) { + EXPECT_EQ(i + 1, arr.value.rust_array._0.array[i]); + } + free_rust_result(arr); + + auto ptr = test_enum_with_ptr(); + EXPECT_EQ(1, *static_cast(ptr.value.ptr._0)); + free_rust_result(ptr); + free_test_ptr(ptr.value.ptr._0); +} \ No newline at end of file diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 0ce63bb44ccbb..2e5b3c69d7a98 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -6679,7 +6679,7 @@ def test_query_text_match_with_unsupported_tokenizer(self): default_schema = CollectionSchema( fields=default_fields, description="test collection" ) - error = {ct.err_code: 2000, ct.err_msg: "invalid tokenizer parameters"} + error = {ct.err_code: 2000, ct.err_msg: "unsupported tokenizer"} self.init_collection_wrap( name=cf.gen_unique_str(prefix), schema=default_schema,