From ee6df77efe27601b44f1541063c5020965e56b13 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 10:39:23 +0000 Subject: [PATCH 01/20] boost for first letter matches --- src/textual/fuzzy.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 1f88feb742..776db99cb9 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -40,6 +40,10 @@ def __init__( ".*?".join(f"({escape(character)})" for character in query), flags=0 if case_sensitive else IGNORECASE, ) + self._first_word_regex = compile( + ".*?".join(f"(\\b{escape(character)})" for character in query), + flags=0 if case_sensitive else IGNORECASE, + ) self._cache: LRUCache[str, float] = LRUCache(1024 * 4) @property @@ -90,6 +94,11 @@ def match(self, candidate: str) -> float: last_offset = offset score = 1.0 - ((group_count - 1) / len(candidate)) + + if first_words := self._first_word_regex.match(candidate): + multiplier = len(first_words.groups()) + # boost if the query matches first words + score *= multiplier self._cache[candidate] = score return score From ecf3fc2b59da2137454592e6ac08e8d84172e609 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 10:48:12 +0000 Subject: [PATCH 02/20] test --- tests/test_fuzzy.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_fuzzy.py b/tests/test_fuzzy.py index d2ab460c9a..36f011d320 100644 --- a/tests/test_fuzzy.py +++ b/tests/test_fuzzy.py @@ -25,6 +25,13 @@ def test_match(): assert matcher.match("foo .ba egg r") == 1.0 - 2 / 13 +def test_boosted_matches(): + matcher = Matcher("ss") + + # First word matchers should score higher + assert matcher.match("Save Screenshot") > matcher.match("Show Keys abcde") + + def test_highlight(): matcher = Matcher("foo.bar") From d97ee3aab45c1b7eee4061cff8e8d59453d70932 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 11:21:27 +0000 Subject: [PATCH 03/20] wip --- src/textual/fuzzy.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 776db99cb9..1f42f8ad3c 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -36,14 +36,19 @@ def __init__( """ self._query = query self._match_style = Style(reverse=True) if match_style is None else match_style + self._case_sensitive = case_sensitive self._query_regex = compile( ".*?".join(f"({escape(character)})" for character in query), flags=0 if case_sensitive else IGNORECASE, ) + _first_word_regex = ".*?".join( + f"(\\b{escape(character)})" for character in query + ) self._first_word_regex = compile( - ".*?".join(f"(\\b{escape(character)})" for character in query), + _first_word_regex, flags=0 if case_sensitive else IGNORECASE, ) + self._cache: LRUCache[str, float] = LRUCache(1024 * 4) @property @@ -64,7 +69,7 @@ def query_pattern(self) -> str: @property def case_sensitive(self) -> bool: """Is this matcher case sensitive?""" - return not bool(self._query_regex.flags & IGNORECASE) + return self._case_sensitive def match(self, candidate: str) -> float: """Match the candidate against the query. @@ -83,6 +88,7 @@ def match(self, candidate: str) -> float: score = 0.0 else: assert match.lastindex is not None + multiplier = 1.0 offsets = [ match.span(group_no)[0] for group_no in range(1, match.lastindex + 1) ] @@ -95,10 +101,11 @@ def match(self, candidate: str) -> float: score = 1.0 - ((group_count - 1) / len(candidate)) - if first_words := self._first_word_regex.match(candidate): - multiplier = len(first_words.groups()) + if first_words := self._first_word_regex.search(candidate): + multiplier = len(first_words.groups()) + 1 # boost if the query matches first words - score *= multiplier + + score *= multiplier self._cache[candidate] = score return score @@ -111,20 +118,18 @@ def highlight(self, candidate: str) -> Text: Returns: A [rich.text.Text][`Text`] object with highlighted matches. """ - match = self._query_regex.search(candidate) text = Text.from_markup(candidate) + match = self._first_word_regex.search(candidate) + if match is None: + match = self._query_regex.search(candidate) + if match is None: return text assert match.lastindex is not None - if self._query in text.plain: - # Favor complete matches - offset = text.plain.index(self._query) - text.stylize(self._match_style, offset, offset + len(self._query)) - else: - offsets = [ - match.span(group_no)[0] for group_no in range(1, match.lastindex + 1) - ] - for offset in offsets: - text.stylize(self._match_style, offset, offset + 1) + offsets = [ + match.span(group_no)[0] for group_no in range(1, match.lastindex + 1) + ] + for offset in offsets: + text.stylize(self._match_style, offset, offset + 1) return text From 6047c15f800574d0b0eb1b2a2b45df9148d47c6f Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 20:47:17 +0000 Subject: [PATCH 04/20] match algorithm --- src/textual/fuzzy.py | 88 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 1f42f8ad3c..0436c86061 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -7,7 +7,9 @@ from __future__ import annotations -from re import IGNORECASE, compile, escape +from dataclasses import dataclass, field +from re import IGNORECASE, compile, escape, finditer +from typing import Iterable import rich.repr from rich.style import Style @@ -16,6 +18,78 @@ from textual.cache import LRUCache +@dataclass +class FuzzyMatch: + candidate_offset: int = 0 + query_offset: int = 0 + offsets: list[int] = field(default_factory=list) + + def advance(self, new_offset: int) -> FuzzyMatch: + return FuzzyMatch(new_offset, self.query_offset, self.offsets.copy()) + + +def match( + query: str, candidate: str, case_sensitive: bool = False +) -> Iterable[list[int]]: + if not case_sensitive: + query = query.lower() + candidate = candidate.lower() + + query_letters: list[tuple[float, int, str]] = [] + for word_match in finditer(r"\w+", candidate): + start, end = word_match.span() + + query_letters.extend( + [ + (True, start, candidate[start]), + *[ + (False, offset, candidate[offset]) + for offset in range(start + 1, end) + ], + ] + ) + + stack: list[FuzzyMatch] = [FuzzyMatch(0, 0, [])] + + while stack: + match = stack[-1] + + if match.candidate_offset >= len(candidate) or match.query_offset >= len(query): + stack.pop() + continue + + try: + offset = candidate.index(query[match.query_offset], match.candidate_offset) + except ValueError: + # Current math was unsuccessful + stack.pop() + continue + + advance_match = match.advance(offset + 1) + match.offsets.append(offset) + match.candidate_offset = offset + 1 + match.query_offset += 1 + + if match.query_offset == len(query): + # Full match + yield match.offsets.copy() + stack.pop() + + stack.append(advance_match) + + # else: + # match.candidate_offset += 1 + # stack.append(match_copy) + + # if query[match.query_offset] == candidate[match.candidate_offset]: + # match_copy = match.copy() + # match_copy.query_offset += 1 + # if match_copy.query_offset == len(query): + # yield match_copy.offsets.copy() + # match_copy.offsets.append(match.candidate_offset) + # stack.append(match_copy) + + @rich.repr.auto class Matcher: """A fuzzy matcher.""" @@ -133,3 +207,15 @@ def highlight(self, candidate: str) -> Text: text.stylize(self._match_style, offset, offset + 1) return text + + +if __name__ == "__main__": + TEST = "Save Screenshot" + from rich import print + from rich.text import Text + + for offsets in match("ee", TEST): + text = Text(TEST) + for offset in offsets: + text.stylize("reverse", offset, offset + 1) + print(text) From 27264c709d6f2d9bff4e6c2f6270b6a1c5e836fb Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 21:11:25 +0000 Subject: [PATCH 05/20] beautiful now --- src/textual/fuzzy.py | 73 ++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 46 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 0436c86061..2b892376a2 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -7,9 +7,8 @@ from __future__ import annotations -from dataclasses import dataclass, field from re import IGNORECASE, compile, escape, finditer -from typing import Iterable +from typing import Iterable, NamedTuple import rich.repr from rich.style import Style @@ -18,19 +17,21 @@ from textual.cache import LRUCache -@dataclass -class FuzzyMatch: +class Search(NamedTuple): candidate_offset: int = 0 query_offset: int = 0 - offsets: list[int] = field(default_factory=list) + offsets: tuple[int, ...] = () - def advance(self, new_offset: int) -> FuzzyMatch: - return FuzzyMatch(new_offset, self.query_offset, self.offsets.copy()) + def branch(self, offset: int) -> tuple[Search, Search]: + return ( + Search(offset + 1, self.query_offset + 1, self.offsets + (offset,)), + Search(offset + 1, self.query_offset, self.offsets), + ) def match( query: str, candidate: str, case_sensitive: bool = False -) -> Iterable[list[int]]: +) -> Iterable[tuple[int, ...]]: if not case_sensitive: query = query.lower() candidate = candidate.lower() @@ -49,45 +50,25 @@ def match( ] ) - stack: list[FuzzyMatch] = [FuzzyMatch(0, 0, [])] + stack: list[Search] = [Search()] + push = stack.append + pop = stack.pop + query_size = len(query) while stack: - match = stack[-1] - - if match.candidate_offset >= len(candidate) or match.query_offset >= len(query): - stack.pop() - continue - - try: - offset = candidate.index(query[match.query_offset], match.candidate_offset) - except ValueError: - # Current math was unsuccessful - stack.pop() - continue - - advance_match = match.advance(offset + 1) - match.offsets.append(offset) - match.candidate_offset = offset + 1 - match.query_offset += 1 - - if match.query_offset == len(query): - # Full match - yield match.offsets.copy() - stack.pop() - - stack.append(advance_match) - - # else: - # match.candidate_offset += 1 - # stack.append(match_copy) - - # if query[match.query_offset] == candidate[match.candidate_offset]: - # match_copy = match.copy() - # match_copy.query_offset += 1 - # if match_copy.query_offset == len(query): - # yield match_copy.offsets.copy() - # match_copy.offsets.append(match.candidate_offset) - # stack.append(match_copy) + search = stack[-1] + offset = candidate.find( + query[search.query_offset], + search.candidate_offset, + ) + if offset == -1: + pop() + else: + advance_branch, stack[-1] = search.branch(offset) + if advance_branch.query_offset == query_size: + yield advance_branch.offsets + else: + push(advance_branch) @rich.repr.auto @@ -214,7 +195,7 @@ def highlight(self, candidate: str) -> Text: from rich import print from rich.text import Text - for offsets in match("ee", TEST): + for offsets in match("shot", TEST): text = Text(TEST) for offset in offsets: text.stylize("reverse", offset, offset + 1) From 181ddb99325d98547a1087a6cd52049012d76649 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 21:12:49 +0000 Subject: [PATCH 06/20] simplify --- src/textual/fuzzy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 2b892376a2..5f03ada344 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -23,9 +23,10 @@ class Search(NamedTuple): offsets: tuple[int, ...] = () def branch(self, offset: int) -> tuple[Search, Search]: + _, query_offset, offsets = self return ( - Search(offset + 1, self.query_offset + 1, self.offsets + (offset,)), - Search(offset + 1, self.query_offset, self.offsets), + Search(offset + 1, query_offset + 1, offsets + (offset,)), + Search(offset + 1, query_offset, offsets), ) From ce84348e5b89f4b251cdfeb9bffd0e6f2616175a Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Mon, 9 Dec 2024 21:16:30 +0000 Subject: [PATCH 07/20] simplify --- src/textual/fuzzy.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 5f03ada344..69c89a22e7 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -55,13 +55,11 @@ def match( push = stack.append pop = stack.pop query_size = len(query) + find = candidate.find while stack: search = stack[-1] - offset = candidate.find( - query[search.query_offset], - search.candidate_offset, - ) + offset = find(query[search.query_offset], search.candidate_offset) if offset == -1: pop() else: From 3267a9cc93dabd92d90b67eb39f831803c5e810b Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 11:19:41 +0000 Subject: [PATCH 08/20] docstrings --- src/textual/fuzzy.py | 228 ++++++++++++++++++++++++------------------- 1 file changed, 128 insertions(+), 100 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 69c89a22e7..362d8f341e 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -7,67 +7,143 @@ from __future__ import annotations -from re import IGNORECASE, compile, escape, finditer +from operator import itemgetter +from re import finditer from typing import Iterable, NamedTuple import rich.repr from rich.style import Style from rich.text import Text -from textual.cache import LRUCache +class _Search(NamedTuple): + """Internal structure to keep track of a recursive search.""" -class Search(NamedTuple): candidate_offset: int = 0 query_offset: int = 0 offsets: tuple[int, ...] = () - def branch(self, offset: int) -> tuple[Search, Search]: + def branch(self, offset: int) -> tuple[_Search, _Search]: + """Branch this search when an offset is found. + + Args: + offset: Offset of a matching letter in the query. + + Returns: + A pair of search objects. + """ _, query_offset, offsets = self return ( - Search(offset + 1, query_offset + 1, offsets + (offset,)), - Search(offset + 1, query_offset, offsets), + _Search(offset + 1, query_offset + 1, offsets + (offset,)), + _Search(offset + 1, query_offset, offsets), ) + @property + def groups(self) -> int: + """Number of groups in offsets.""" + groups = 1 + last_offset = self.offsets[0] + for offset in self.offsets[1:]: + if offset != last_offset + 1: + groups += 1 + last_offset = offset + return groups -def match( - query: str, candidate: str, case_sensitive: bool = False -) -> Iterable[tuple[int, ...]]: - if not case_sensitive: - query = query.lower() - candidate = candidate.lower() - - query_letters: list[tuple[float, int, str]] = [] - for word_match in finditer(r"\w+", candidate): - start, end = word_match.span() - - query_letters.extend( - [ - (True, start, candidate[start]), - *[ - (False, offset, candidate[offset]) - for offset in range(start + 1, end) - ], - ] - ) - stack: list[Search] = [Search()] - push = stack.append - pop = stack.pop - query_size = len(query) - find = candidate.find - - while stack: - search = stack[-1] - offset = find(query[search.query_offset], search.candidate_offset) - if offset == -1: - pop() +class FuzzySearch: + """Performs a fuzzy search. + + Unlike a regex solution, this will finds all possible matches. + """ + + def __init__(self, case_sensitive: bool = False) -> None: + """_summary_ + + Args: + case_sensitive: Is the match case sensitive? + """ + self.cache: dict[tuple[str, str, bool], tuple[float, tuple[int, ...]]] = {} + self.case_sensitive = case_sensitive + + def match(self, query: str, candidate: str) -> tuple[float, tuple[int, ...]]: + """Match against a query. + + Args: + query: The fuzzy query. + candidate: A candidate to check,. + + Returns: + A pair of (score, tuple of offsets). `(0, ())` for no result. + """ + cache_key = (query, candidate, self.case_sensitive) + if cache_key in self.cache: + return self.cache[cache_key] + + matches = sorted(self._match(query, candidate), key=itemgetter(0)) + result: tuple[float, tuple[int, ...]] + if not matches: + result = (0.0, ()) else: - advance_branch, stack[-1] = search.branch(offset) - if advance_branch.query_offset == query_size: - yield advance_branch.offsets - else: - push(advance_branch) + result = matches[-1] + self.cache[cache_key] = result + return result + + def _match( + self, query: str, candidate: str + ) -> Iterable[tuple[float, tuple[int, ...]]]: + """Generator to do the matching. + + Args: + query: Query to match. + candidate: Candidate to check against. + + Yields: + Pairs of score and tuple of offsets. + """ + if not self.case_sensitive: + query = query.lower() + candidate = candidate.lower() + + # We need this to give a bonus to first letters. + first_letters = {match.start() for match in finditer(r"\w+", candidate)} + + def score(search: _Search) -> float: + """Sore a search. + + Args: + search: Search object. + + Returns: + Score. + + """ + # This is a heuristic, and can be tweaked for better results + # 2 points for a first letter, 1.0 for other letters + score: float = sum( + (2.0 if offset in first_letters else 1.0) for offset in search.offsets + ) + # Divide by the number of groups + # 1 group no change, 2 groups score is halved etc + score /= search.groups + return score + + stack: list[_Search] = [_Search()] + push = stack.append + pop = stack.pop + query_size = len(query) + find = candidate.find + + while stack: + search = pop() + offset = find(query[search.query_offset], search.candidate_offset) + if offset != -1: + advance_branch, branch = search.branch(offset) + if advance_branch.query_offset == query_size: + yield score(advance_branch), advance_branch.offsets + push(branch) + else: + push(advance_branch) + push(branch) @rich.repr.auto @@ -91,19 +167,7 @@ def __init__( self._query = query self._match_style = Style(reverse=True) if match_style is None else match_style self._case_sensitive = case_sensitive - self._query_regex = compile( - ".*?".join(f"({escape(character)})" for character in query), - flags=0 if case_sensitive else IGNORECASE, - ) - _first_word_regex = ".*?".join( - f"(\\b{escape(character)})" for character in query - ) - self._first_word_regex = compile( - _first_word_regex, - flags=0 if case_sensitive else IGNORECASE, - ) - - self._cache: LRUCache[str, float] = LRUCache(1024 * 4) + self.fuzzy_search = FuzzySearch() @property def query(self) -> str: @@ -115,11 +179,6 @@ def match_style(self) -> Style: """The style that will be used to highlight hits in the matched text.""" return self._match_style - @property - def query_pattern(self) -> str: - """The regular expression pattern built from the query.""" - return self._query_regex.pattern - @property def case_sensitive(self) -> bool: """Is this matcher case sensitive?""" @@ -134,34 +193,7 @@ def match(self, candidate: str) -> float: Returns: Strength of the match from 0 to 1. """ - cached = self._cache.get(candidate) - if cached is not None: - return cached - match = self._query_regex.search(candidate) - if match is None: - score = 0.0 - else: - assert match.lastindex is not None - multiplier = 1.0 - offsets = [ - match.span(group_no)[0] for group_no in range(1, match.lastindex + 1) - ] - group_count = 0 - last_offset = -2 - for offset in offsets: - if offset > last_offset + 1: - group_count += 1 - last_offset = offset - - score = 1.0 - ((group_count - 1) / len(candidate)) - - if first_words := self._first_word_regex.search(candidate): - multiplier = len(first_words.groups()) + 1 - # boost if the query matches first words - - score *= multiplier - self._cache[candidate] = score - return score + return self.fuzzy_search.match(self.query, candidate)[0] def highlight(self, candidate: str) -> Text: """Highlight the candidate with the fuzzy match. @@ -173,29 +205,25 @@ def highlight(self, candidate: str) -> Text: A [rich.text.Text][`Text`] object with highlighted matches. """ text = Text.from_markup(candidate) - match = self._first_word_regex.search(candidate) - if match is None: - match = self._query_regex.search(candidate) - - if match is None: + score, offsets = self.fuzzy_search.match(self.query, candidate) + if not score: return text - assert match.lastindex is not None - offsets = [ - match.span(group_no)[0] for group_no in range(1, match.lastindex + 1) - ] for offset in offsets: text.stylize(self._match_style, offset, offset + 1) - return text if __name__ == "__main__": - TEST = "Save Screenshot" + TEST = "Savess Screens shot" from rich import print from rich.text import Text - for offsets in match("shot", TEST): + fuzzy = FuzzySearch() + + for score, offsets in fuzzy._match("ss", TEST): text = Text(TEST) for offset in offsets: text.stylize("reverse", offset, offset + 1) + print("--") + print(score) print(text) From 6153e70f6f1be94e7399393c8a1ac1cbef02dc5f Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 11:31:21 +0000 Subject: [PATCH 09/20] tests --- src/textual/fuzzy.py | 2 +- tests/test_fuzzy.py | 26 +++++++++----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 362d8f341e..c64c515931 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -123,7 +123,7 @@ def score(search: _Search) -> float: (2.0 if offset in first_letters else 1.0) for offset in search.offsets ) # Divide by the number of groups - # 1 group no change, 2 groups score is halved etc + # 1 group no change, 2 groups score is halved etc. score /= search.groups return score diff --git a/tests/test_fuzzy.py b/tests/test_fuzzy.py index 36f011d320..dc3c8ccd92 100644 --- a/tests/test_fuzzy.py +++ b/tests/test_fuzzy.py @@ -4,28 +4,20 @@ from textual.fuzzy import Matcher -def test_match(): - matcher = Matcher("foo.bar") - - # No match - assert matcher.match("egg") == 0 - assert matcher.match("") == 0 - - # Perfect match - assert matcher.match("foo.bar") == 1.0 - # Perfect match (with superfluous characters) - assert matcher.match("foo.bar sdf") == 1.0 - assert matcher.match("xz foo.bar sdf") == 1.0 +def test_no_match(): + """Check non matching score of zero.""" + matcher = Matcher("x") + assert matcher.match("foo") == 0 - # Partial matches - # 2 Groups - assert matcher.match("foo egg.bar") == 1.0 - 1 / 11 - # 3 Groups - assert matcher.match("foo .ba egg r") == 1.0 - 2 / 13 +def test_match_single_group(): + """Check that single groups rang higher.""" + matcher = Matcher("abc") + assert matcher.match("foo abc bar") > matcher.match("fooa barc") def test_boosted_matches(): + """Check first word matchers rank higher.""" matcher = Matcher("ss") # First word matchers should score higher From ba961c4c9177450c7ffbdb149ef4fcb597397062 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 11:35:05 +0000 Subject: [PATCH 10/20] remove superfluous code --- src/textual/fuzzy.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index c64c515931..6d5d1e4859 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -118,7 +118,7 @@ def score(search: _Search) -> float: """ # This is a heuristic, and can be tweaked for better results - # 2 points for a first letter, 1.0 for other letters + # 2 points for a first letter, 1 for other letters score: float = sum( (2.0 if offset in first_letters else 1.0) for offset in search.offsets ) @@ -211,19 +211,3 @@ def highlight(self, candidate: str) -> Text: for offset in offsets: text.stylize(self._match_style, offset, offset + 1) return text - - -if __name__ == "__main__": - TEST = "Savess Screens shot" - from rich import print - from rich.text import Text - - fuzzy = FuzzySearch() - - for score, offsets in fuzzy._match("ss", TEST): - text = Text(TEST) - for offset in offsets: - text.stylize("reverse", offset, offset + 1) - print("--") - print(score) - print(text) From 52439f5cba999058b660149e141a7388140803be Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 11:52:06 +0000 Subject: [PATCH 11/20] snapshot --- CHANGELOG.md | 1 + .../test_example_color_command.svg | 112 +++++++++--------- 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38a6cb0c2b..c1666869d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Change default quit key to `ctrl+q` https://github.com/Textualize/textual/pull/5352 - Changed delete line binding on TextArea to use `ctrl+shift+x` https://github.com/Textualize/textual/pull/5352 - The command palette will now select the top item automatically https://github.com/Textualize/textual/pull/5361 +- Implemented a better matching algorithm for the command palette https://github.com/Textualize/textual/pull/5365 ### Fixed diff --git a/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg b/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg index 75ecf06763..a1e713eb55 100644 --- a/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg +++ b/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg @@ -19,132 +19,132 @@ font-weight: 700; } - .terminal-3987436012-matrix { + .terminal-905867209-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-3987436012-title { + .terminal-905867209-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-3987436012-r1 { fill: #c5c8c6 } -.terminal-3987436012-r2 { fill: #e0e0e0 } -.terminal-3987436012-r3 { fill: #ffffff } + .terminal-905867209-r1 { fill: #c5c8c6 } +.terminal-905867209-r2 { fill: #e0e0e0 } +.terminal-905867209-r3 { fill: #ffffff } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - Press ctrl + p and type a color + Press ctrl + p and type a color - - - - ⭘                    Press ctrl + p and type a color                 - - - - -red - - - - - - - - - - - - - - - - - + + + + ⭘                    Press ctrl + p and type a color                 + + + + +ansi_red + + + + + + + + + + + + + + + + + From c55259b0fa06b92160d7e89450c626f8c0afba0b Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 12:01:45 +0000 Subject: [PATCH 12/20] fix snapshot --- .../test_example_color_command.svg | 112 +++++++++--------- tests/snapshot_tests/test_snapshots.py | 2 +- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg b/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg index a1e713eb55..75ecf06763 100644 --- a/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg +++ b/tests/snapshot_tests/__snapshots__/test_snapshots/test_example_color_command.svg @@ -19,132 +19,132 @@ font-weight: 700; } - .terminal-905867209-matrix { + .terminal-3987436012-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-905867209-title { + .terminal-3987436012-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-905867209-r1 { fill: #c5c8c6 } -.terminal-905867209-r2 { fill: #e0e0e0 } -.terminal-905867209-r3 { fill: #ffffff } + .terminal-3987436012-r1 { fill: #c5c8c6 } +.terminal-3987436012-r2 { fill: #e0e0e0 } +.terminal-3987436012-r3 { fill: #ffffff } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - Press ctrl + p and type a color + Press ctrl + p and type a color - - - - ⭘                    Press ctrl + p and type a color                 - - - - -ansi_red - - - - - - - - - - - - - - - - - + + + + ⭘                    Press ctrl + p and type a color                 + + + + +red + + + + + + + + + + + + + + + + + diff --git a/tests/snapshot_tests/test_snapshots.py b/tests/snapshot_tests/test_snapshots.py index a6723b8be2..c3c9791068 100644 --- a/tests/snapshot_tests/test_snapshots.py +++ b/tests/snapshot_tests/test_snapshots.py @@ -1510,7 +1510,7 @@ def test_example_color_command(snap_compare): """Test the color_command example.""" assert snap_compare( EXAMPLES_DIR / "color_command.py", - press=[App.COMMAND_PALETTE_BINDING, "r", "e", "d", "down", "enter"], + press=[App.COMMAND_PALETTE_BINDING, "r", "e", "d", "enter"], ) From cc5fa9e647f6009d9c177173a879508958a23a85 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 14:52:57 +0000 Subject: [PATCH 13/20] tweak to heuristic --- src/textual/fuzzy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 6d5d1e4859..051431a33d 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -118,13 +118,13 @@ def score(search: _Search) -> float: """ # This is a heuristic, and can be tweaked for better results - # 2 points for a first letter, 1 for other letters + # Boost first letter matches score: float = sum( (2.0 if offset in first_letters else 1.0) for offset in search.offsets ) - # Divide by the number of groups - # 1 group no change, 2 groups score is halved etc. - score /= search.groups + # A single group gets a boost, as the user may be typing out an entire word + if search.groups == 1: + score *= 1.5 return score stack: list[_Search] = [_Search()] From 924765feff8298fd511a76781e2fc5644bc89d5b Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 15:41:39 +0000 Subject: [PATCH 14/20] use max over sort --- src/textual/fuzzy.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 051431a33d..5ad0bb6197 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -78,13 +78,9 @@ def match(self, query: str, candidate: str) -> tuple[float, tuple[int, ...]]: cache_key = (query, candidate, self.case_sensitive) if cache_key in self.cache: return self.cache[cache_key] - - matches = sorted(self._match(query, candidate), key=itemgetter(0)) - result: tuple[float, tuple[int, ...]] - if not matches: - result = (0.0, ()) - else: - result = matches[-1] + result = max( + self._match(query, candidate), key=itemgetter(0), default=(0.0, ()) + ) self.cache[cache_key] = result return result From 4a038e3129121e4c6a71cc2ebda51a3b9246635d Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Tue, 10 Dec 2024 16:03:32 +0000 Subject: [PATCH 15/20] wrong words --- src/textual/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textual/containers.py b/src/textual/containers.py index 18a46d1455..a6f655a177 100644 --- a/src/textual/containers.py +++ b/src/textual/containers.py @@ -267,7 +267,7 @@ def __init__( stretch_height: bool = True, regular: bool = False, ) -> None: - """Initialize a Widget. + """ Args: *children: Child widgets. From a1bb0561f16c41de33f60447f8726ceae425f1ee Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Wed, 11 Dec 2024 11:13:17 +0000 Subject: [PATCH 16/20] limit loops --- src/textual/fuzzy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 5ad0bb6197..df7413af5a 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -128,8 +128,11 @@ def score(search: _Search) -> float: pop = stack.pop query_size = len(query) find = candidate.find + # Limit the number of loops out of an abundance of caution. + # This would be hard to reach without contrived data. + remaining_loops = 50 - while stack: + while stack and (remaining_loops := remaining_loops - 1): search = pop() offset = find(query[search.query_offset], search.candidate_offset) if offset != -1: @@ -205,5 +208,6 @@ def highlight(self, candidate: str) -> Text: if not score: return text for offset in offsets: - text.stylize(self._match_style, offset, offset + 1) + if not candidate[offset].isspace(): + text.stylize(self._match_style, offset, offset + 1) return text From cc0615b74c3683b17d68ea158c59a3e8e5f59e99 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Wed, 11 Dec 2024 11:28:15 +0000 Subject: [PATCH 17/20] heuristic --- src/textual/fuzzy.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index df7413af5a..011ad0717d 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -118,9 +118,10 @@ def score(search: _Search) -> float: score: float = sum( (2.0 if offset in first_letters else 1.0) for offset in search.offsets ) - # A single group gets a boost, as the user may be typing out an entire word - if search.groups == 1: - score *= 1.5 + # Boost to favor less groups + offset_count = len(search.offsets) + normalized_groups = (offset_count - (search.groups - 1)) / offset_count + score *= 1 + (normalized_groups**2) return score stack: list[_Search] = [_Search()] From dbadf6b6b5c334e7e313bd03bc0fec46c78544b6 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Wed, 11 Dec 2024 11:29:40 +0000 Subject: [PATCH 18/20] docstring --- src/textual/fuzzy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 011ad0717d..acddf0ae09 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -57,7 +57,7 @@ class FuzzySearch: """ def __init__(self, case_sensitive: bool = False) -> None: - """_summary_ + """Initialize fuzzy search. Args: case_sensitive: Is the match case sensitive? From 4b105913d5e0b8b57a3b3c07668008a55ba3665c Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Wed, 11 Dec 2024 11:44:45 +0000 Subject: [PATCH 19/20] bump max loops --- src/textual/fuzzy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index acddf0ae09..5e683bd78a 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -131,7 +131,7 @@ def score(search: _Search) -> float: find = candidate.find # Limit the number of loops out of an abundance of caution. # This would be hard to reach without contrived data. - remaining_loops = 50 + remaining_loops = 500 while stack and (remaining_loops := remaining_loops - 1): search = pop() From ea2a731aab63ef17e4fc540dfdc5cfd207ad7c65 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Wed, 11 Dec 2024 11:57:40 +0000 Subject: [PATCH 20/20] bail early --- src/textual/fuzzy.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/textual/fuzzy.py b/src/textual/fuzzy.py index 5e683bd78a..337ad29b4d 100644 --- a/src/textual/fuzzy.py +++ b/src/textual/fuzzy.py @@ -8,7 +8,7 @@ from __future__ import annotations from operator import itemgetter -from re import finditer +from re import IGNORECASE, escape, finditer, search from typing import Iterable, NamedTuple import rich.repr @@ -75,6 +75,14 @@ def match(self, query: str, candidate: str) -> tuple[float, tuple[int, ...]]: Returns: A pair of (score, tuple of offsets). `(0, ())` for no result. """ + + query_regex = ".*?".join(f"({escape(character)})" for character in query) + if not search( + query_regex, candidate, flags=0 if self.case_sensitive else IGNORECASE + ): + # Bail out early if there is no possibility of a match + return (0.0, ()) + cache_key = (query, candidate, self.case_sensitive) if cache_key in self.cache: return self.cache[cache_key] @@ -131,7 +139,7 @@ def score(search: _Search) -> float: find = candidate.find # Limit the number of loops out of an abundance of caution. # This would be hard to reach without contrived data. - remaining_loops = 500 + remaining_loops = 200 while stack and (remaining_loops := remaining_loops - 1): search = pop()