Skip to content

Commit

Permalink
Issues mozfr#320: move all our regex logic into a unit tested class
Browse files Browse the repository at this point in the history
  • Loading branch information
pascalchevrel committed Dec 30, 2015
1 parent 1e20764 commit ea8c5cc
Show file tree
Hide file tree
Showing 10 changed files with 445 additions and 79 deletions.
228 changes: 228 additions & 0 deletions app/classes/Transvision/Search.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
<?php
namespace Transvision;

/**
* Search class
*
* Allows searching for data in our repositories using a fluent interface.
* Currently, only the regex part (definition of the search) is implemented.
* ex:
* $search = (new Search)
* ->setSearchTerms('Bookmark this page')
* ->setRegexWholeWords(true)
* ->setRegexCase('sensitive')
* ->setRegexPerfectMatch(false);
*/
class Search
{
/**
* The trimmed string searched, we keep that one as the canonical reference
* @var string
*/
protected $search_terms;

/**
* The generated regex string updated dynamically via updateRegex()
* @var string
*/
protected $regex;

/**
* Case sensibility of the regex
* @var string
*/
protected $regex_case;

/**
* Consider the space separated string as a single word for search
* @var string
*/
protected $regex_whole_words;

/**
* Only return strings that match the search perfectly (case excluded)
* @var boolean
*/
protected $regex_perfect_match;

/**
* The search terms for the regex, those differ from $search_terms as
* they can be changed dynamically via setRegexSearchTerms()
* @var string
*/
protected $regex_search_terms;

/**
* We set the default values for a search
*/
public function __construct()
{
$this->search_terms = '';
$this->regex = '';
$this->regex_case = 'i';
$this->regex_whole_words = '';
$this->regex_perfect_match = false;
$this->regex_search_terms = '';
}

/**
* Store the searched string in $search_terms and in $regex_search_terms
*
* @param string $string String we want to search for
* @return $this
*/
public function setSearchTerms($string)
{
$this->search_terms = trim($string);
$this->regex_search_terms = $this->search_terms;
$this->updateRegex();

return $this;
}

/**
* Allows setting a new searched term for the regex.
* This is mostly useful when you have a multi-words search and need to
* loop through all the words to return results.
*
* @param string $string The string we want to update the regex for
* @return $this
*/
public function setRegexSearchTerms($string)
{
$this->regex_search_terms = $string;
$this->updateRegex();

return $this;
}

/**
* Set the regex case sensibility.
*
* @param boolean $flag 'sensitive' == '' in a regex
* @return $this
*/
public function setRegexCase($flag)
{
$this->regex_case = (boolean) $flag ? '' : 'i';
$this->updateRegex();

return $this;
}

/**
* Set the regex to only return perfect matches for the searched string.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag Set to True for a perfect match
* @return $this
*/
public function setRegexPerfectMatch($flag)
{
$this->regex_perfect_match = (boolean) $flag;
$this->updateRegex();

return $this;
}

/**
* Set the regex so as that a multi-word search is taken as a single word.
* We cast the value to a boolean because we usually get it from a GET.
*
* @param boolean $flag A string evaluated to True will add \b to the regex
* @return $this
*/
public function setRegexWholeWords($flag)
{
$this->regex_whole_words = (boolean) $flag ? '\b' : '';
$this->updateRegex();

return $this;
}

/**
* Update the $regex_search_terms value every time a setter to the regex
* is called.
*
* @return $this
*/
private function updateRegex()
{
if ($this->regex_perfect_match) {
$search = '^' . $this->regex_search_terms . '$';
} else {
$search = preg_quote($this->regex_search_terms, '~');
}

$this->regex =
'~'
. $this->regex_whole_words
. $search
. $this->regex_whole_words
. '~'
. $this->regex_case
. 'u';

return $this;
}

/**
* Get the regex string
*
* @return string The regex
*/
public function getRegex()
{
return $this->regex;
}

/**
* Get the state of regex_perfect_match
*
* @return boolean True if the regex searches for a perfect string match
*/
public function isPerfectMatch()
{
return $this->regex_perfect_match;
}

/**
* Get search terms
*
* @return string Searched terms
*/
public function getSearchTerms()
{
return $this->search_terms;
}

/**
* Get search terms in regex
*
* @return string Searched terms in regex
*/
public function getRegexSearchTerms()
{
return $this->regex_search_terms;
}

/**
* Get the regex case
*
* @return string Return 'i' for case insensitive search, '' for sensitive
*/
public function getRegexCase()
{
return $this->regex_case;
}

/**
* Get the regex whole words
*
* @return boolean True if we have the 'whole words' option for the regex
*/
public function isWholeWords()
{
return $this->regex_whole_words;
}
}
19 changes: 7 additions & 12 deletions app/inc/search_options.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,13 @@
// Locales list for the select boxes
$loc_list = Project::getRepositoryLocales($check['repo']);

// Search for perfectMatch
if ($check['perfect_match']) {
$my_search = trim('^' . $my_search . '$');
} else {
$my_search = preg_quote($my_search, '/');
}

// Regex options
$case_sensitive = $check['case_sensitive'] ? '' : 'i';
$whole_word = $check['whole_word'] ? '\b' : '';
$delimiter = '~';
$main_regex = $delimiter . $whole_word . $my_search . $whole_word . $delimiter . $case_sensitive;
// Define our regex
$search = (new Search)
->setSearchTerms(Utils::cleanString($_GET['recherche']))
->setRegexWholeWords($check['whole_word'])
->setRegexCase($check['case_sensitive'])
->setRegexPerfectMatch($check['perfect_match'])
;

// build the repository switcher
$repo_list = Utils::getHtmlSelectOptions($repos_nice_names, $check['repo'], true);
Expand Down
9 changes: 4 additions & 5 deletions app/models/3locales_search.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@

$tmx_target2 = Utils::getRepoStrings($locale2, $check['repo']);

if ($check['perfect_match']) {
$locale3_strings = preg_grep($regex, $tmx_target2);
if ($search->isPerfectMatch()) {
$locale3_strings = preg_grep($search->getRegex(), $tmx_target2);
} else {
$locale3_strings = $tmx_target2;
foreach (Utils::uniqueWords($initial_search) as $word) {
$regex = $delimiter . $whole_word . preg_quote($word, $delimiter) .
$whole_word . $delimiter . $case_sensitive . 'u';
$locale3_strings = preg_grep($regex, $locale3_strings);
$search->setRegexSearchTerms($word);
$locale3_strings = preg_grep($search->getRegex(), $locale3_strings);
}
}

Expand Down
27 changes: 14 additions & 13 deletions app/models/api/repository_search.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,34 @@
$source_strings_merged = [];
$target_strings_merged = [];

// Define our regex
$search = (new Search)
->setSearchTerms(Utils::cleanString($initial_search))
->setRegexWholeWords($get_option('whole_word'))
->setRegexCase($get_option('case_sensitive'))
->setRegexPerfectMatch($get_option('perfect_match'))
;

// We loop through all repositories searched and merge results
foreach ($repositories as $repository) {
$source_strings = Utils::getRepoStrings($request->parameters[4], $repository);

// Regex options
$whole_word = $get_option('whole_word') ? '\b' : '';
$case_sensitive = $get_option('case_sensitive') ? '' : 'i';

if ($get_option('perfect_match')) {
$regex = '~' . $whole_word . trim('^' . preg_quote($initial_search, '~') . '$') .
$whole_word . '~' . $case_sensitive . 'u';
if ($search->isPerfectMatch()) {
if ($request->parameters[2] == 'entities') {
$entities = ShowResults::searchEntities($source_strings, $regex);
$entities = ShowResults::searchEntities($source_strings, $search->getRegex());
$source_strings = array_intersect_key($source_strings, array_flip($entities));
} else {
$source_strings = preg_grep($regex, $source_strings);
$source_strings = preg_grep($search->getRegex(), $source_strings);
$entities = array_keys($source_strings);
}
} else {
foreach (Utils::uniqueWords($initial_search) as $word) {
$regex = '~' . $whole_word . preg_quote($word, '~') .
$whole_word . '~' . $case_sensitive . 'u';
$search->setRegexSearchTerms($word);
if ($request->parameters[2] == 'entities') {
$entities = ShowResults::searchEntities($source_strings, $regex);
$entities = ShowResults::searchEntities($source_strings, $search->getRegex());
$source_strings = array_intersect_key($source_strings, array_flip($entities));
} else {
$source_strings = preg_grep($regex, $source_strings);
$source_strings = preg_grep($search->getRegex(), $source_strings);
$entities = array_keys($source_strings);
}
}
Expand Down
32 changes: 20 additions & 12 deletions app/models/api/suggestions.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
<?php
namespace Transvision;

// Closure to use extra parameters
$get_option = function ($option) use ($request) {
if (isset($request->extra_parameters[$option])) {
return $request->extra_parameters[$option];
}

return false;
};

$repositories = ($request->parameters[2] == 'global')
? Project::getRepositories()
: [$request->parameters[2]];
Expand All @@ -12,28 +21,27 @@
$initial_search = Utils::cleanString($request->parameters[5]);
$terms = Utils::uniqueWords($initial_search);

// Regex options (not currenty used)
$delimiter = '~';
$whole_word = isset($check['whole_word']) ? '\b' : '';
$case_sensitive = isset($check['case_sensitive']) ? '' : 'i';
$regex = $delimiter . $whole_word . $initial_search . $whole_word .
$delimiter . $case_sensitive . 'u';
// Define our regex
$search = (new Search)
->setSearchTerms(Utils::cleanString($initial_search))
->setRegexWholeWords($get_option('whole_word'))
->setRegexCase($get_option('case_sensitive'))
->setRegexPerfectMatch($get_option('perfect_match'))
;

// Loop through all repositories searching in both source and target languages
foreach ($repositories as $repository) {
$source_strings = Utils::getRepoStrings($request->parameters[3], $repository);
foreach ($terms as $word) {
$regex = $delimiter . $whole_word . preg_quote($word, $delimiter) .
$whole_word . $delimiter . $case_sensitive . 'u';
$source_strings = preg_grep($regex, $source_strings);
$search->setRegexSearchTerms($word);
$source_strings = preg_grep($search->getRegex(), $source_strings);
}
$source_strings_merged = array_merge($source_strings, $source_strings_merged);

$target_strings = Utils::getRepoStrings($request->parameters[4], $repository);
foreach ($terms as $word) {
$regex = $delimiter . $whole_word . preg_quote($word, $delimiter) .
$whole_word . $delimiter . $case_sensitive . 'u';
$target_strings = preg_grep($regex, $target_strings);
$search->setRegexSearchTerms($word);
$target_strings = preg_grep($search->getRegex(), $target_strings);
}
$target_strings_merged = array_merge($target_strings, $target_strings_merged);
}
Expand Down
Loading

0 comments on commit ea8c5cc

Please sign in to comment.