From 3cef5effe9a3cf6ad9e173f19f3067d4ff21a3ba Mon Sep 17 00:00:00 2001 From: Piotr Surowiec Date: Fri, 8 Apr 2022 15:36:59 +0200 Subject: [PATCH] fix: do not decorate `ELLIPSIS` with `` tags (#123) * fix: do not decorate `ELLIPSIS` with `` tags Multiple matches are merged with `ELLIPSIS` as a separator. This reverses the order of operations so that the matches are decorated before they are merged. This way, the content of `ELLIPSIS` is not decorated with HTML `` tags. * fix: use correct module for Iterable Collections Abstract Base Classes are deprecated in `collections` since Python 3.3. They will be moved to `collections.abc` after Python 3.8. https://docs.python.org/3.8/library/collections.html --- edxsearch/__init__.py | 2 +- search/result_processor.py | 8 ++++---- search/tests/test_search_result_processor.py | 19 ++++++++++++++++++- search/utils.py | 4 ++-- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/edxsearch/__init__.py b/edxsearch/__init__.py index e546d286..0f5cc993 100644 --- a/edxsearch/__init__.py +++ b/edxsearch/__init__.py @@ -1,3 +1,3 @@ """ Container module for testing / demoing search """ -__version__ = '3.2.0' +__version__ = '3.3.0' diff --git a/search/result_processor.py b/search/result_processor.py index ab800fe7..cc5961ed 100644 --- a/search/result_processor.py +++ b/search/result_processor.py @@ -142,9 +142,9 @@ def excerpt(self): match_phrases, DESIRED_EXCERPT_LENGTH ) - excerpt_text = ELLIPSIS.join(matches) - for match_word in match_phrases: - excerpt_text = SearchResultProcessor.decorate_matches(excerpt_text, match_word) + for i, _ in enumerate(matches): + for match_word in match_phrases: + matches[i] = SearchResultProcessor.decorate_matches(matches[i], match_word) - return excerpt_text + return ELLIPSIS.join(matches) diff --git a/search/tests/test_search_result_processor.py b/search/tests/test_search_result_processor.py index cb3994fb..3dbcce65 100644 --- a/search/tests/test_search_result_processor.py +++ b/search/tests/test_search_result_processor.py @@ -4,7 +4,7 @@ from django.test import TestCase from django.test.utils import override_settings -from search.result_processor import SearchResultProcessor +from search.result_processor import SearchResultProcessor, ELLIPSIS # Any class that inherits from TestCase will cause too-many-public-methods pylint error @@ -250,6 +250,23 @@ def test_excerpt_quoted(self, search_phrase, expected_excerpt): srp = SearchResultProcessor(test_result, search_phrase) self.assertIn(expected_excerpt, srp.excerpt) + def test_excerpt_ellipsis_undecorated(self): + """ + Multiple matches are joined with `ELLIPSIS` as a separator. + This verifies that the `ELLIPSIS` is not decorated with the HTML `` tag when it matches the search phrase. + + E.g. when `a` was in the search phrases before, it was resulting in text like `an`, which is not valid + HTML. + """ + test_result = { + "content": { + "a": "Just a line of text.", + "b": "Just a line of different text.", + } + } + srp = SearchResultProcessor(test_result, 'Just a line') + self.assertIn(ELLIPSIS, srp.excerpt) + class TestSearchResultProcessor(SearchResultProcessor): """ diff --git a/search/utils.py b/search/utils.py index dfba836f..a5b4a424 100644 --- a/search/utils.py +++ b/search/utils.py @@ -1,7 +1,7 @@ """ Utility classes to support others """ import importlib -import collections +from collections.abc import Iterable def _load_class(class_path, default): @@ -21,7 +21,7 @@ def _load_class(class_path, default): def _is_iterable(item): """ Checks if an item is iterable (list, tuple, generator), but not string """ - return isinstance(item, collections.Iterable) and not isinstance(item, str) + return isinstance(item, Iterable) and not isinstance(item, str) class ValueRange: