From 3239e21d951ceeacb31274db4ce448e48863ae08 Mon Sep 17 00:00:00 2001 From: Mikkel Denker Date: Thu, 29 Feb 2024 09:45:05 +0100 Subject: [PATCH] parse site block rules into 'HostRankings' instead. they are still executed as exactly the same tantivy queries, but this allows us to correctly import the sites from exported optics. --- crates/core/src/query/optic.rs | 14 ++++++++++++++ crates/optics/src/lib.rs | 29 +++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/crates/core/src/query/optic.rs b/crates/core/src/query/optic.rs index 7c7920e3..e51e6907 100644 --- a/crates/core/src/query/optic.rs +++ b/crates/core/src/query/optic.rs @@ -1720,6 +1720,20 @@ mod tests { .webpages; assert_eq!(res.len(), 1); assert_eq!(res[0].url, "https://example.com/test"); + + let res = searcher + .search(&SearchQuery { + query: "example".to_string(), + optic: Some( + Optic::parse("Rule { Matches { Site(\"|example.com|\") }, Action(Discard) }") + .unwrap(), + ), + ..Default::default() + }) + .unwrap() + .webpages; + assert_eq!(res.len(), 1); + assert_eq!(res[0].url, "https://another-example.com/"); } #[test] diff --git a/crates/optics/src/lib.rs b/crates/optics/src/lib.rs index 09bb2d55..d2f7302e 100644 --- a/crates/optics/src/lib.rs +++ b/crates/optics/src/lib.rs @@ -77,9 +77,15 @@ impl TryFrom for Optic { fn try_from(raw: RawOptic) -> Result { let mut rules = Vec::new(); + let mut blocked = Vec::new(); for rule in raw.rules { - rules.push(Rule::try_from(rule)?); + let rule = Rule::try_from(rule)?; + + match rule.as_blocked_site() { + Some(site) => blocked.push(site), + None => rules.push(rule), + } } let mut liked_hosts = Vec::new(); @@ -99,7 +105,7 @@ impl TryFrom for Optic { host_rankings: HostRankings { liked: liked_hosts, disliked: disliked_hosts, - blocked: Vec::new(), // blocked hosts are handled by `$discard` syntax. + blocked, }, }) } @@ -349,6 +355,25 @@ pub struct Rule { /// What action to take if the rule matches. pub action: Action, } +impl Rule { + /// If the rule is on the form `Rule { Matches { Site("|...|") }, Action(Discard) }`, return the site to block. + fn as_blocked_site(&self) -> Option { + if self.action == Action::Discard { + let matching = self.matches.first()?.first()?; + + if matching.location == MatchLocation::Site + && matching.pattern.first()? == &PatternPart::Anchor + && matching.pattern.get(2)? == &PatternPart::Anchor + { + if let PatternPart::Raw(site) = matching.pattern.get(1)? { + return Some(site.clone()); + } + } + } + + None + } +} impl Display for Rule { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {