Skip to content

Commit

Permalink
Nom query parser (#184)
Browse files Browse the repository at this point in the history
* model that inbody:... intitle:... etc can have either simple term or phrase query as subterm

* re-write query parser using nom

* all whitespace queries should return empty terms vec
  • Loading branch information
mikkeldenker authored Mar 19, 2024
1 parent 37b6c7d commit e47b49a
Show file tree
Hide file tree
Showing 11 changed files with 821 additions and 605 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ memmap = "0.7.0"
memmap2 = "0.9.0"
mime = "0.3.17"
min-max-heap = "1.3.0"
nom = "7.1.3"
num_cpus = "1.15.0"
once_cell = "1.13.1"
proptest = "1.2.0"
Expand Down
1 change: 1 addition & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ memmap = { workspace = true }
memmap2 = { workspace = true }
mime = { workspace = true }
min-max-heap = { workspace = true }
nom = { workspace = true }
num_cpus = { workspace = true }
once_cell = { workspace = true }
openraft = { workspace = true }
Expand Down
14 changes: 7 additions & 7 deletions crates/core/src/bangs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ impl Bangs {
}
}

pub fn get(&self, terms: &[Box<Term>]) -> Option<BangHit> {
pub fn get(&self, terms: &[Term]) -> Option<BangHit> {
for possible_bang in terms.iter().filter_map(|term| {
if let Term::PossibleBang(possible_bang) = term.as_ref() {
if let Term::PossibleBang(possible_bang) = term {
Some(possible_bang)
} else {
None
Expand All @@ -122,13 +122,13 @@ impl Bangs {
terms
.iter()
.filter(|term| {
if let Term::PossibleBang(bang) = term.as_ref() {
if let Term::PossibleBang(bang) = term {
bang != possible_bang
} else {
true
}
})
.map(|term| term.as_ref().to_string()),
.map(|term| term.to_string()),
" ".to_string(),
)
.collect::<String>();
Expand Down Expand Up @@ -172,11 +172,11 @@ mod tests {
}]"#,
);

assert_eq!(bangs.get(&parse("no bangs")), None);
assert_eq!(bangs.get(&parse("!no bangs")), None);
assert_eq!(bangs.get(&parse("no bangs").unwrap()), None);
assert_eq!(bangs.get(&parse("!no bangs").unwrap()), None);

assert_eq!(
bangs.get(&parse("!ty bangs")),
bangs.get(&parse("!ty bangs").unwrap()),
Some(BangHit {
bang: Bang {
category: Some("Multimedia".to_string()),
Expand Down
18 changes: 11 additions & 7 deletions crates/core/src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,16 @@ pub mod union;

use parser::Term;

use self::{optic::AsMultipleTantivyQuery, parser::CompoundAwareTerm};
use self::{
optic::AsMultipleTantivyQuery,
parser::{CompoundAwareTerm, SimpleOrPhrase},
};

const MAX_SIMILAR_TERMS: usize = 10;

#[derive(Clone, Debug)]
pub struct Query {
#[allow(clippy::vec_box)]
terms: Vec<Box<Term>>,
terms: Vec<Term>,
simple_terms_text: Vec<String>,
tantivy_query: Box<BooleanQuery>,
host_rankings: HostRankings,
Expand All @@ -58,7 +60,7 @@ pub struct Query {

impl Query {
pub fn parse(ctx: &Ctx, query: &SearchQuery, index: &InvertedIndex) -> Result<Query> {
let parsed_terms = parser::parse(&query.query);
let parsed_terms = parser::parse(&query.query)?;
let mut term_count = HashMap::new();
let mut terms = Vec::new();

Expand All @@ -76,7 +78,7 @@ impl Query {
.clone()
.into_iter()
.map(|term| CompoundAwareTerm {
term: *term,
term,
adjacent_terms: Vec::new(),
})
.collect();
Expand All @@ -87,7 +89,9 @@ impl Query {
for window in term_ids.windows(window_size) {
let mut window_terms = Vec::new();
for i in window {
if let Term::Simple(t) = &compound_terms[*i].term {
if let Term::SimpleOrPhrase(SimpleOrPhrase::Simple(t)) =
&compound_terms[*i].term
{
window_terms.push(t.clone());
}
}
Expand Down Expand Up @@ -180,7 +184,7 @@ impl Query {
&self.simple_terms_text
}

pub fn terms(&self) -> &[Box<Term>] {
pub fn terms(&self) -> &[Term] {
&self.terms
}

Expand Down
Loading

0 comments on commit e47b49a

Please sign in to comment.