Skip to content

Commit

Permalink
fix: empty analyzer params not use standard tokenizer (#38148)
Browse files Browse the repository at this point in the history
relate: #35853

Signed-off-by: aoiasd <[email protected]>
  • Loading branch information
aoiasd authored Dec 4, 2024
1 parent 1f66b9e commit 87aa9a0
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ impl AnalyzerBuilder<'_> {
}
}

fn get_tokenizer_name(&self) -> Result<String> {
let tokenizer = self.params.get("tokenizer");
if tokenizer.is_none() {
return Ok("standard".to_string());
fn get_tokenizer_name(&self) -> Result<String>{
let tokenizer=self.params.get("tokenizer");
if tokenizer.is_none(){
return Err(TantivyBindingError::InternalError(format!(
"tokenizer name or type must be set"
)));
}
if !tokenizer.unwrap().is_string() {
return Err(TantivyBindingError::InternalError(format!(
Expand Down Expand Up @@ -257,8 +259,14 @@ pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result<TextAnalyz
"analyzer params should be a json map".to_string(),
));
}
let mut builder = AnalyzerBuilder::new(analyzer_params.unwrap().as_object().unwrap());

let builder_params = analyzer_params.unwrap().as_object().unwrap();
if builder_params.is_empty(){
return Ok(standard_analyzer(vec![]));
}

let mut builder = AnalyzerBuilder::new(builder_params);

// build custom filter
let filter_params = json_params.get("filter");
if !filter_params.is_none() && filter_params.unwrap().is_object() {
Expand All @@ -282,7 +290,6 @@ pub(crate) fn create_tokenizer(params: &String) -> Result<TextAnalyzer> {
#[cfg(test)]
mod tests {
use crate::tokenizer::create_tokenizer;
use regex;

#[test]
fn test_standard_analyzer() {
Expand Down

0 comments on commit 87aa9a0

Please sign in to comment.