Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

boost for first letter matches #5365

Merged
merged 20 commits into from
Dec 11, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Change default quit key to `ctrl+q` https://github.com/Textualize/textual/pull/5352
- Changed delete line binding on TextArea to use `ctrl+shift+x` https://github.com/Textualize/textual/pull/5352
- The command palette will now select the top item automatically https://github.com/Textualize/textual/pull/5361
- Implemented a better matching algorithm for the command palette https://github.com/Textualize/textual/pull/5365

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion src/textual/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def __init__(
stretch_height: bool = True,
regular: bool = False,
) -> None:
"""Initialize a Widget.
"""

Args:
*children: Child widgets.
Expand Down
195 changes: 148 additions & 47 deletions src/textual/fuzzy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,151 @@

from __future__ import annotations

from re import IGNORECASE, compile, escape
from operator import itemgetter
from re import IGNORECASE, escape, finditer, search
from typing import Iterable, NamedTuple

import rich.repr
from rich.style import Style
from rich.text import Text

from textual.cache import LRUCache

class _Search(NamedTuple):
"""Internal structure to keep track of a recursive search."""

candidate_offset: int = 0
query_offset: int = 0
offsets: tuple[int, ...] = ()

def branch(self, offset: int) -> tuple[_Search, _Search]:
"""Branch this search when an offset is found.

Args:
offset: Offset of a matching letter in the query.

Returns:
A pair of search objects.
"""
_, query_offset, offsets = self
return (
_Search(offset + 1, query_offset + 1, offsets + (offset,)),
_Search(offset + 1, query_offset, offsets),
)

@property
def groups(self) -> int:
"""Number of groups in offsets."""
groups = 1
last_offset = self.offsets[0]
for offset in self.offsets[1:]:
if offset != last_offset + 1:
groups += 1
last_offset = offset
return groups


class FuzzySearch:
"""Performs a fuzzy search.

Unlike a regex solution, this will finds all possible matches.
"""

def __init__(self, case_sensitive: bool = False) -> None:
"""Initialize fuzzy search.

Args:
case_sensitive: Is the match case sensitive?
"""
self.cache: dict[tuple[str, str, bool], tuple[float, tuple[int, ...]]] = {}
self.case_sensitive = case_sensitive

def match(self, query: str, candidate: str) -> tuple[float, tuple[int, ...]]:
"""Match against a query.

Args:
query: The fuzzy query.
candidate: A candidate to check,.

Returns:
A pair of (score, tuple of offsets). `(0, ())` for no result.
"""

query_regex = ".*?".join(f"({escape(character)})" for character in query)
if not search(
query_regex, candidate, flags=0 if self.case_sensitive else IGNORECASE
):
# Bail out early if there is no possibility of a match
return (0.0, ())

cache_key = (query, candidate, self.case_sensitive)
if cache_key in self.cache:
return self.cache[cache_key]
result = max(
self._match(query, candidate), key=itemgetter(0), default=(0.0, ())
)
self.cache[cache_key] = result
return result

def _match(
self, query: str, candidate: str
) -> Iterable[tuple[float, tuple[int, ...]]]:
"""Generator to do the matching.

Args:
query: Query to match.
candidate: Candidate to check against.

Yields:
Pairs of score and tuple of offsets.
"""
if not self.case_sensitive:
query = query.lower()
candidate = candidate.lower()

# We need this to give a bonus to first letters.
first_letters = {match.start() for match in finditer(r"\w+", candidate)}

def score(search: _Search) -> float:
"""Sore a search.

Args:
search: Search object.

Returns:
Score.

"""
# This is a heuristic, and can be tweaked for better results
# Boost first letter matches
score: float = sum(
(2.0 if offset in first_letters else 1.0) for offset in search.offsets
)
# Boost to favor less groups
offset_count = len(search.offsets)
normalized_groups = (offset_count - (search.groups - 1)) / offset_count
score *= 1 + (normalized_groups**2)
return score

stack: list[_Search] = [_Search()]
push = stack.append
pop = stack.pop
query_size = len(query)
find = candidate.find
# Limit the number of loops out of an abundance of caution.
# This would be hard to reach without contrived data.
remaining_loops = 200

while stack and (remaining_loops := remaining_loops - 1):
search = pop()
offset = find(query[search.query_offset], search.candidate_offset)
if offset != -1:
advance_branch, branch = search.branch(offset)
if advance_branch.query_offset == query_size:
yield score(advance_branch), advance_branch.offsets
push(branch)
else:
push(advance_branch)
push(branch)


@rich.repr.auto
Expand All @@ -36,11 +174,8 @@ def __init__(
"""
self._query = query
self._match_style = Style(reverse=True) if match_style is None else match_style
self._query_regex = compile(
".*?".join(f"({escape(character)})" for character in query),
flags=0 if case_sensitive else IGNORECASE,
)
self._cache: LRUCache[str, float] = LRUCache(1024 * 4)
self._case_sensitive = case_sensitive
self.fuzzy_search = FuzzySearch()

@property
def query(self) -> str:
Expand All @@ -52,15 +187,10 @@ def match_style(self) -> Style:
"""The style that will be used to highlight hits in the matched text."""
return self._match_style

@property
def query_pattern(self) -> str:
"""The regular expression pattern built from the query."""
return self._query_regex.pattern

@property
def case_sensitive(self) -> bool:
"""Is this matcher case sensitive?"""
return not bool(self._query_regex.flags & IGNORECASE)
return self._case_sensitive

def match(self, candidate: str) -> float:
"""Match the candidate against the query.
Expand All @@ -71,27 +201,7 @@ def match(self, candidate: str) -> float:
Returns:
Strength of the match from 0 to 1.
"""
cached = self._cache.get(candidate)
if cached is not None:
return cached
match = self._query_regex.search(candidate)
if match is None:
score = 0.0
else:
assert match.lastindex is not None
offsets = [
match.span(group_no)[0] for group_no in range(1, match.lastindex + 1)
]
group_count = 0
last_offset = -2
for offset in offsets:
if offset > last_offset + 1:
group_count += 1
last_offset = offset

score = 1.0 - ((group_count - 1) / len(candidate))
self._cache[candidate] = score
return score
return self.fuzzy_search.match(self.query, candidate)[0]

def highlight(self, candidate: str) -> Text:
"""Highlight the candidate with the fuzzy match.
Expand All @@ -102,20 +212,11 @@ def highlight(self, candidate: str) -> Text:
Returns:
A [rich.text.Text][`Text`] object with highlighted matches.
"""
match = self._query_regex.search(candidate)
text = Text.from_markup(candidate)
if match is None:
score, offsets = self.fuzzy_search.match(self.query, candidate)
if not score:
return text
assert match.lastindex is not None
if self._query in text.plain:
# Favor complete matches
offset = text.plain.index(self._query)
text.stylize(self._match_style, offset, offset + len(self._query))
else:
offsets = [
match.span(group_no)[0] for group_no in range(1, match.lastindex + 1)
]
for offset in offsets:
for offset in offsets:
if not candidate[offset].isspace():
text.stylize(self._match_style, offset, offset + 1)

return text
2 changes: 1 addition & 1 deletion tests/snapshot_tests/test_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ def test_example_color_command(snap_compare):
"""Test the color_command example."""
assert snap_compare(
EXAMPLES_DIR / "color_command.py",
press=[App.COMMAND_PALETTE_BINDING, "r", "e", "d", "down", "enter"],
press=[App.COMMAND_PALETTE_BINDING, "r", "e", "d", "enter"],
)


Expand Down
29 changes: 14 additions & 15 deletions tests/test_fuzzy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,24 @@
from textual.fuzzy import Matcher


def test_match():
matcher = Matcher("foo.bar")
def test_no_match():
"""Check non matching score of zero."""
matcher = Matcher("x")
assert matcher.match("foo") == 0


# No match
assert matcher.match("egg") == 0
assert matcher.match("") == 0
def test_match_single_group():
"""Check that single groups rang higher."""
matcher = Matcher("abc")
assert matcher.match("foo abc bar") > matcher.match("fooa barc")

# Perfect match
assert matcher.match("foo.bar") == 1.0
# Perfect match (with superfluous characters)
assert matcher.match("foo.bar sdf") == 1.0
assert matcher.match("xz foo.bar sdf") == 1.0

# Partial matches
# 2 Groups
assert matcher.match("foo egg.bar") == 1.0 - 1 / 11
def test_boosted_matches():
"""Check first word matchers rank higher."""
matcher = Matcher("ss")

# 3 Groups
assert matcher.match("foo .ba egg r") == 1.0 - 2 / 13
# First word matchers should score higher
assert matcher.match("Save Screenshot") > matcher.match("Show Keys abcde")


def test_highlight():
Expand Down
Loading