Skip to content

Commit

Permalink
Merge pull request #3 from snipsco/task/boundaries-check
Browse files Browse the repository at this point in the history
Boundaries check
  • Loading branch information
hdlj authored Jul 3, 2017
2 parents e48f5aa + 962c5a0 commit e8b6d15
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 85 deletions.
12 changes: 8 additions & 4 deletions core/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@ use cell;

use {CoreResult, Pattern, RuleSet, Sym, SymbolTable, NodePayload};
use pattern;
use helpers::BoundariesChecker;
use rule::{Rule, Rule1, Rule2, Rule3, Rule4, Rule5, Rule6, RuleProductionArg};

use rule::rule_errors::*;

pub struct RuleSetBuilder<StashValue: NodePayload> {
symbols: cell::RefCell<SymbolTable>,
rules: cell::RefCell<Vec<Box<Rule<StashValue>>>>,
boundaries_checker: BoundariesChecker,
}

impl<StashValue: NodePayload> Default for RuleSetBuilder<StashValue> {
fn default() -> RuleSetBuilder<StashValue> {
impl<StashValue: NodePayload> RuleSetBuilder<StashValue> {
pub fn new(boundaries_checker: BoundariesChecker) -> RuleSetBuilder<StashValue> {
RuleSetBuilder {
symbols: cell::RefCell::new(SymbolTable::default()),
rules: cell::RefCell::new(vec![]),
boundaries_checker,
}
}
}
Expand Down Expand Up @@ -147,14 +150,15 @@ impl<StashValue: NodePayload> RuleSetBuilder<StashValue> {
}

pub fn reg(&self, regex:&str) -> CoreResult<pattern::TextPattern<StashValue>> {
Ok(pattern::TextPattern::new(::regex::Regex::new(regex)?, self.sym(regex)))
Ok(pattern::TextPattern::new(::regex::Regex::new(regex)?, self.sym(regex), self.boundaries_checker))
}

pub fn reg_neg_lh(&self, regex:&str, neg_lh:&str) -> CoreResult<pattern::TextNegLHPattern<StashValue>> {
Ok(pattern::TextNegLHPattern::new(
::regex::Regex::new(regex)?,
::regex::Regex::new(neg_lh)?,
self.sym(format!("{}(?:{})", regex, neg_lh))))
self.sym(format!("{}(?:{})", regex, neg_lh)),
self.boundaries_checker))
}

pub fn build(self) -> RuleSet<StashValue> {
Expand Down
155 changes: 155 additions & 0 deletions core/src/helpers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
use range::Range;

#[derive(Copy,Clone, Debug, PartialEq)]
pub enum BoundariesChecker {
SperatedAlphanumericWord,
ComposedWordOrDetailed,
Detailed,
}

impl BoundariesChecker {
pub fn check(&self, sentence: &str, range: Range) -> bool {
match self {
&BoundariesChecker::SperatedAlphanumericWord => {
left_valid_boundaries(sentence, range, ValidBoundariesOption::OnCharClassChange, &alphanumeric_class)
&& right_valid_boundaries(sentence, range, ValidBoundariesOption::OnCharClassChange, &alphanumeric_class)
},
&BoundariesChecker::ComposedWordOrDetailed => {
(
left_valid_boundaries(sentence, range, ValidBoundariesOption::OnSameCharClass, &composed_word_class)
|| left_valid_boundaries(sentence, range, ValidBoundariesOption::OnCharClassChange, &detailed_class)
) && (
right_valid_boundaries(sentence, range, ValidBoundariesOption::OnSameCharClass, &composed_word_class)
|| right_valid_boundaries(sentence, range, ValidBoundariesOption::OnCharClassChange, &detailed_class)
)
},
&BoundariesChecker::Detailed => {
left_valid_boundaries(sentence, range, ValidBoundariesOption::OnCharClassChange, &detailed_class)
&& right_valid_boundaries(sentence, range, ValidBoundariesOption::OnCharClassChange, &detailed_class)
}
}
}
}

enum ValidBoundariesOption {
OnCharClassChange,
OnSameCharClass,
}

fn composed_word_class(c: char) -> char {
if c.is_alphabetic() {
'A'
} else {
'O'
}
}

fn alphanumeric_class(c: char) -> char {
if c.is_alphanumeric() { 'A' } else { c }
}

fn detailed_class(c: char) -> char {
if c.is_uppercase() {
'u'
} else if c.is_lowercase() {
'l'
} else if c.is_digit(10) {
'd'
} else {
c
}
}

fn right_valid_boundaries<CharClass>(sentence: &str, range: Range, option: ValidBoundariesOption, char_class: &CharClass) -> bool
where CharClass: Fn(char) -> char
{
let last_mine = sentence[range.0..range.1]
.chars()
.next_back()
.map(char_class); //Some(c)
let first_after = sentence[range.1..].chars().next().map(char_class); // Option(c)

match option {
ValidBoundariesOption::OnCharClassChange => {
last_mine != first_after
},
ValidBoundariesOption::OnSameCharClass => {
last_mine == first_after
}
}
}

fn left_valid_boundaries<CharClass>(sentence: &str, range: Range, option: ValidBoundariesOption, char_class: &CharClass) -> bool
where CharClass: Fn(char) -> char
{
let first_mine = sentence[range.0..range.1]
.chars()
.next()
.map(char_class); // Some(c)
let last_before = sentence[..range.0].chars().next_back().map(char_class); // Option(c)

match option {
ValidBoundariesOption::OnCharClassChange => {
first_mine != last_before
},
ValidBoundariesOption::OnSameCharClass => {
first_mine == last_before
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_valid_boundaries_alphanumeric() {
let checker = BoundariesChecker::SperatedAlphanumericWord;
assert_eq!(true, checker.check("abc def ret", Range(4, 7))); // "def"
assert_eq!(false, checker.check("abc def ret", Range(2, 8))); // "c def r"
assert_eq!(false, checker.check("abc def123 ret", Range(4, 7))); // "def"
assert_eq!(true, checker.check("def123 ret", Range(0, 6))); // "def123"
assert_eq!(false, checker.check("def123 ret", Range(0, 3))); // "def"
assert_eq!(true, checker.check("ret def", Range(4, 7))); // "def"
assert_eq!(false, checker.check("ret 123def", Range(7, 10))); // "def"
assert_eq!(false, checker.check("aéc def ret", Range(3, 9))); // "c def r"
assert_eq!(false, checker.check("aec def rét", Range(2, 8))); // "c def r"
assert_eq!(false, checker.check("aec déf ret", Range(2, 9))); // "c déf r"
assert_eq!(false, checker.check("aeç def ret", Range(2, 9))); // "ç def r"
assert_eq!(true, checker.check("aeç def ret", Range(4, 8))); // " def "
}

#[test]
fn test_valid_boundaries_composed_word_or_detailed() {
let checker = BoundariesChecker::ComposedWordOrDetailed;
assert_eq!(true, checker.check("abc def ret", Range(4, 7))); // "def"
assert_eq!(true, checker.check("abc def ret", Range(2, 8))); // "c def r"
assert_eq!(true, checker.check("abc def123 ret", Range(4, 7))); // "def"
assert_eq!(true, checker.check("def123 ret", Range(0, 6))); // "def123"
assert_eq!(true, checker.check("def123 ret", Range(0, 3))); // "def"
assert_eq!(true, checker.check("ret def", Range(4, 7))); // "def"
assert_eq!(true, checker.check("ret 123def", Range(7, 10))); // "def"
assert_eq!(true, checker.check("aéc def ret", Range(3, 9))); // "c def r"
assert_eq!(true, checker.check("aec def rét", Range(2, 8))); // "c def r"
assert_eq!(true, checker.check("aec déf ret", Range(2, 9))); // "c déf r"
assert_eq!(true, checker.check("aeç def ret", Range(2, 9))); // "ç def r"
assert_eq!(true, checker.check("aeç def ret", Range(4, 8))); // " def "
}

#[test]
fn test_valid_boundaries_detailed() {
let checker = BoundariesChecker::Detailed;
assert_eq!(true, checker.check("abc def ret", Range(4, 7))); // "def"
assert_eq!(false, checker.check("abc def ret", Range(2, 8))); // "c def r"
assert_eq!(true, checker.check("abc def123 ret", Range(4, 7))); // "def"
assert_eq!(true, checker.check("def123 ret", Range(0, 6))); // "def123"
assert_eq!(true, checker.check("def123 ret", Range(0, 3))); // "def"
assert_eq!(true, checker.check("ret def", Range(4, 7))); // "def"
assert_eq!(true, checker.check("ret 123def", Range(7, 10))); // "def"
assert_eq!(false, checker.check("aéc def ret", Range(3, 9))); // "c def r"
assert_eq!(false, checker.check("aec def rét", Range(2, 8))); // "c def r"
assert_eq!(false, checker.check("aec déf ret", Range(2, 9))); // "c déf r"
assert_eq!(false, checker.check("aeç def ret", Range(2, 9))); // "ç def r"
assert_eq!(true, checker.check("aeç def ret", Range(4, 8))); // " def "
}
}
63 changes: 4 additions & 59 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ pub mod pattern;
pub mod rule;
mod builder;
mod range;
mod helpers;

use rule::Rule;
use pattern::Pattern;
pub use range::Range;
pub use rule::rule_errors::*;
pub use builder::RuleSetBuilder;
pub use helpers::BoundariesChecker;

use errors::*;
pub mod errors {
Expand Down Expand Up @@ -155,48 +157,14 @@ impl<StashValue: NodePayload> RuleSet<StashValue> {
}
previous_stash_size = stash.len();
}
Ok(stash.into_iter().filter(|pn| valid_boundaries(sentence, pn.root_node.byte_range, &alphanumeric_class)).collect())
Ok(stash.into_iter().filter(|pn| BoundariesChecker::SperatedAlphanumericWord.check(sentence, pn.root_node.byte_range)).collect())
}

pub fn resolve_sym(&self, sym:&Sym) -> Option<&str> {
self.symbols.0.resolve(*sym)
}
}

fn alphanumeric_class(c: char) -> char {
if c.is_alphanumeric() { 'A' } else { c }
}

fn detailed_class(c: char) -> char {
if c.is_uppercase() {
'u'
} else if c.is_lowercase() {
'l'
} else if c.is_digit(10) {
'd'
} else {
c
}
}

fn valid_boundaries<CharClass>(sentence: &str, range: Range, char_class: &CharClass) -> bool
where CharClass: Fn(char) -> char
{
let first_mine = sentence[range.0..range.1]
.chars()
.next()
.map(char_class); // Some(c)
let last_mine = sentence[range.0..range.1]
.chars()
.next_back()
.map(char_class); //Some(c)
let last_before = sentence[..range.0].chars().next_back().map(char_class); // Option(c)
let first_after = sentence[range.1..].chars().next().map(char_class); // Option(c)

first_mine != last_before && last_mine != first_after
}


#[derive(Copy,Clone, Debug, PartialEq)]
pub struct SendSyncPhantomData<T>(::std::marker::PhantomData<T>);
unsafe impl<T> Send for SendSyncPhantomData<T> {}
Expand All @@ -205,27 +173,4 @@ impl<T> SendSyncPhantomData<T> {
pub fn new() -> SendSyncPhantomData<T> {
SendSyncPhantomData(::std::marker::PhantomData)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_valid_boundaries() {
let an = |c: char| if c.is_alphanumeric() { 'A' } else { c };
assert_eq!(true, valid_boundaries("abc def ret", Range(4, 7), &an)); // "def"
assert_eq!(false, valid_boundaries("abc def ret", Range(2, 8), &an)); // "c def r"
assert_eq!(false,
valid_boundaries("abc def123 ret", Range(4, 7), &an)); // "def"
assert_eq!(true, valid_boundaries("def123 ret", Range(0, 6), &an)); // "def123"
assert_eq!(false, valid_boundaries("def123 ret", Range(0, 3), &an)); // "def"
assert_eq!(true, valid_boundaries("ret def", Range(4, 7), &an)); // "def"
assert_eq!(false, valid_boundaries("ret 123def", Range(7, 10), &an)); // "def"
assert_eq!(false, valid_boundaries("aéc def ret", Range(3, 9), &an)); // "c def r"
assert_eq!(false, valid_boundaries("aec def rét", Range(2, 8), &an)); // "c def r"
assert_eq!(false, valid_boundaries("aec déf ret", Range(2, 9), &an)); // "c déf r"
assert_eq!(false, valid_boundaries("aeç def ret", Range(2, 9), &an)); // "ç def r"
assert_eq!(true, valid_boundaries("aeç def ret", Range(4, 8), &an)); // " def "
}
}
}
Loading

0 comments on commit e8b6d15

Please sign in to comment.