From b904ecf81c8175a7b3063c727ee953f73d632596 Mon Sep 17 00:00:00 2001 From: Kevin McElwee Date: Wed, 2 Mar 2022 13:03:30 -0500 Subject: [PATCH 01/15] Add views to docs --- sphinx-docs/codedocs.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sphinx-docs/codedocs.rst b/sphinx-docs/codedocs.rst index bcf18e0..6faf19f 100644 --- a/sphinx-docs/codedocs.rst +++ b/sphinx-docs/codedocs.rst @@ -84,6 +84,11 @@ Signals .. automodule:: parasolr.django.signals :members: +Views +^^^^^ + +.. automodule:: parasolr.django.views + :members: Manage Commands --------------- From 8c5fbb13fabf632515f78cb48b78883a80c35fa2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 7 Feb 2022 13:17:58 -0500 Subject: [PATCH 02/15] Set version to 0.8.2 --- CHANGELOG.rst | 4 ++-- parasolr/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a8440f8..831e23d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,8 @@ CHANGELOG ========= -0.9 ---- +0.8.2 +----- * When subclassing ``SolrQuerySet``, result documents can now be customized by extending ``get_result_document`` diff --git a/parasolr/__init__.py b/parasolr/__init__.py index 4f560b4..9528ed6 100644 --- a/parasolr/__init__.py +++ b/parasolr/__init__.py @@ -1,6 +1,6 @@ default_app_config = "parasolr.apps.ParasolConfig" -__version_info__ = (0, 9, 0, "dev") +__version_info__ = (0, 8, 2, None) # Dot-connect all but the last. Last is dash-connected if not None. __version__ = ".".join([str(i) for i in __version_info__[:-1]]) From 2f3ff570c50ebce5036601872810fa66b3129b8b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 15 Dec 2022 14:40:19 -0500 Subject: [PATCH 03/15] Add basic support for response for Solr grouped queries --- parasolr/solr/client.py | 111 +++++++++++++++++++++++------ parasolr/solr/tests/test_client.py | 63 +++++++++++++++- 2 files changed, 153 insertions(+), 21 deletions(-) diff --git a/parasolr/solr/client.py b/parasolr/solr/client.py index a9befc0..ea20a42 100644 --- a/parasolr/solr/client.py +++ b/parasolr/solr/client.py @@ -1,6 +1,6 @@ import logging from collections import OrderedDict -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional import requests from attrdict import AttrDict @@ -39,33 +39,43 @@ def __repr__(self): return "ParasolrDict(%s)" % super(AttrDict, self).__repr__() -class QueryResponse: - """Thin wrapper to give access to Solr select responses. - - Args: - response: A Solr query response - """ +class BaseResponse: + """Base Solr response class with fields common to standard and + grouped results.""" def __init__(self, response: Dict) -> None: - # cast to ParasolrDict for any dict-like object - response = ParasolrDict(response) - self.numFound = int(response.response.numFound) - self.start = int(response.response.start) - self.docs = response.response.docs - self.params = response.responseHeader.params - self.stats = response.stats if "stats" in response else {} + # cast to ParasolrDict for any dict-like object and store + self.response = ParasolrDict(response) + # facet counts need to be processed to convert into + # ordered dict, so process and store self.facet_counts = {} - if "docs" in response.response: - self.docs = response.response.docs - if "facet_counts" in response: + if "facet_counts" in self.response: self.facet_counts = self._process_facet_counts(response.facet_counts) - self.highlighting = response.get("highlighting", {}) - self.expanded = response.get("expanded", {}) # NOTE: To access facet_counts.facet_fields or facet_counts.facet_ranges # as OrderedDicts, you must use dict notation (or AttrDict *will* # convert). + @property + def params(self): + "parameters sent to solr in the request, as returned in response header" + return self.response.responseHeader.params + + @property + def stats(self): + "stats portion of the response, if statics were requested" + return self.response.get("stats", {}) + + @property + def highlighting(self): + "highlighting portion of the response, if highlighting was requested" + return self.response.get("highlighting", {}) + + @property + def expanded(self): + "expanded portion of the response, if collapse/expanded results enabled" + return self.response.get("expanded", {}) + def _process_facet_counts(self, facet_counts: AttrDict) -> OrderedDict: """Convert facet_fields and facet_ranges to OrderedDict. @@ -86,6 +96,58 @@ def _process_facet_counts(self, facet_counts: AttrDict) -> OrderedDict: return facet_counts +class QueryResponse(BaseResponse): + """Thin wrapper to give access to Solr select responses. + + Args: + response: A Solr query response + """ + + def __init__(self, response: Dict) -> None: + super().__init__(response) + # document list is contained with the "response" element + # in the json returned by solr + self.document_list = self.response.response + + @property + def numFound(self) -> int: + return self.document_list.numFound + + @property + def start(self) -> int: + return self.document_list.start + + @property + def docs(self) -> List: + return self.document_list.docs + + +class GroupedResponse(BaseResponse): + """Query response variant for grouped results. + + Args: + response: A Solr query response + """ + + def __init__(self, response: Dict) -> None: + super().__init__(response) + # grouped response structure is structured as a dict + # first keyed on fieldname with number of matches, then a dict + # of group values and corresponding document list + self.grouped = self.response.grouped + + # access grouped results at: + # self.grouped.fieldname.groups + # groups has + # will be a dict of field value, doclist + + @property + def numFound(self) -> int: + # each field used for grouping has a total + # for the number of matches in that grouping + return sum(group["matches"] for group in self.grouped.values()) + + class SolrClient(ClientBase): """Class to aggregate all of the other Solr APIs and settings. @@ -155,4 +217,13 @@ def query(self, wrap: bool = True, **kwargs: Any) -> Optional[QueryResponse]: response = self.make_request("post", url, headers=headers, params=kwargs) if response: # queries return the search response for now - return QueryResponse(response) if wrap else response + + # unnless a raw/unwrapped result is requested, + # determine result type to use and initialize + if wrap: + result_class = QueryResponse + if "grouped" in response: + result_class = GroupedResponse + response = result_class(response) + + return response diff --git a/parasolr/solr/tests/test_client.py b/parasolr/solr/tests/test_client.py index 88817b5..9e8fa13 100644 --- a/parasolr/solr/tests/test_client.py +++ b/parasolr/solr/tests/test_client.py @@ -6,7 +6,12 @@ from parasolr import __version__ as parasolr_ver from parasolr.solr.admin import CoreAdmin -from parasolr.solr.client import ParasolrDict, QueryResponse, SolrClient +from parasolr.solr.client import ( + GroupedResponse, + ParasolrDict, + QueryResponse, + SolrClient, +) from parasolr.solr.schema import Schema from parasolr.solr.update import Update @@ -169,3 +174,59 @@ def test_query(self, test_client): # test wrap = False response = test_client.query(q="*:*", wrap=False) assert not isinstance(response, QueryResponse) + + # test grouped response return type + response = test_client.query(q="*:*", group="true", **{"group.field": "A"}) + assert isinstance(response, GroupedResponse) + + +class TestGroupedResponse: + def test_init(self): + + response = AttrDict( + { + "responseHeader": {"params": {"group": "true", "group.field": "A"}}, + "grouped": { + "A": { + "matches": 4, + "groups": [ + { + "groupValue": "foo", + "doclist": { + "numFound": 1, + "start": 0, + "docs": [{"A": "foo", "B": 5, "id": "1"}], + }, + } + ], + } + }, + "facet_counts": { + "facet_fields": {"A": ["5", 1, "2", 1, "3", 1]}, + "facet_ranges": {"A": {"counts": ["1", 1, "2", 2, "7", 1]}}, + }, + "stats": { + "stats_fields": { + "account_start_i": { + "min": 1919.0, + "max": 2018.0, + } + } + }, + } + ) + gr = GroupedResponse(response) + # total number found across groups + assert gr.numFound == response.grouped.A.matches + # can access group information + assert gr.grouped.A.groups[0].groupValue == "foo" + assert gr.grouped.A.groups[0].doclist.numFound == 1 + + # inherited from base response class + assert gr.params == response.responseHeader.params + assert gr.stats == response.stats + assert isinstance(gr.facet_counts["facet_fields"]["A"], OrderedDict) + assert isinstance(gr.facet_counts["facet_ranges"]["A"]["counts"], OrderedDict) + assert gr.facet_counts["facet_fields"]["A"]["5"] == 1 + assert gr.facet_counts["facet_ranges"]["A"]["counts"]["2"] == 2 + assert gr.highlighting == {} From b27cb8ac82f1b09bb61c67e8187bed98e79e90c8 Mon Sep 17 00:00:00 2001 From: Ryan Heuser Date: Thu, 15 Dec 2022 16:17:56 -0500 Subject: [PATCH 04/15] reorganizing how groupresponse returns data --- parasolr/query/queryset.py | 50 ++++++++++++++++++++++++++++---------- parasolr/solr/client.py | 19 ++++++++++++++- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/parasolr/query/queryset.py b/parasolr/query/queryset.py index 1507c91..73ff1d2 100644 --- a/parasolr/query/queryset.py +++ b/parasolr/query/queryset.py @@ -30,6 +30,7 @@ class SolrQuerySet: """ _result_cache = None + _result_cache_d = {} start = 0 stop = None sort_options = [] @@ -59,7 +60,10 @@ def __init__(self, solr: SolrClient): # convert search operator into form needed for combining queries self._search_op = " %s " % self.default_search_operator - def get_results(self, **kwargs) -> List[dict]: + + + + def get_response(self, **kwargs) -> List[dict]: """ Query Solr and get the results for the current query and filter options. Populates result cache and returns the documents portion @@ -72,17 +76,39 @@ def get_results(self, **kwargs) -> List[dict]: # if query options have changed? # For now, always query. - query_opts = self.query_opts() - query_opts.update(**kwargs) - # TODO: what do we do about the fact that Solr defaults - # to 10 rows? + if not self._result_cache: + + query_opts = self.query_opts() + query_opts.update(**kwargs) + + # TODO: what do we do about the fact that Solr defaults + # to 10 rows? - # NOTE: django templates choke on AttrDict because it is - # callable; using dictionary response instead - self._result_cache = self.solr.query(**query_opts) + # NOTE: django templates choke on AttrDict because it is + # callable; using dictionary response instead + + self._result_cache = self.solr.query(**query_opts) + + return self._result_cache + + + def get_results(self, **kwargs) -> List[dict]: + """ + Query Solr and get the results for the current query and filter + options. Populates result cache and returns the documents portion + of the reponse. + + Returns: + Solr response documents as a list of dictionaries. + """ + # get query response + response = self.get_response(**kwargs) # if there is a query error, result will not be set - if self._result_cache: - return [self.get_result_document(doc) for doc in self._result_cache.docs] + if response: + return [ + self.get_result_document(doc) + for doc in self._result_cache.docs + ] return [] def get_result_document(self, doc): @@ -538,9 +564,7 @@ def raw_query_parameters(self, **kwargs) -> "SolrQuerySet": def get_highlighting(self) -> Dict[str, Dict[str, List]]: """Return the highlighting portion of the Solr response.""" - if not self._result_cache: - self.get_results() - return self._result_cache.highlighting + return self.get_response().highlighting def all(self) -> "SolrQuerySet": """Return a new queryset that is a copy of the current one.""" diff --git a/parasolr/solr/client.py b/parasolr/solr/client.py index ea20a42..cc00f89 100644 --- a/parasolr/solr/client.py +++ b/parasolr/solr/client.py @@ -141,11 +141,28 @@ def __init__(self, response: Dict) -> None: # groups has # will be a dict of field value, doclist + @property + def group_field(self) -> str: + "group.field as stored in the params. Not yet supporting grouping by query." + return self.params.get('group.field','') + @property def numFound(self) -> int: # each field used for grouping has a total # for the number of matches in that grouping - return sum(group["matches"] for group in self.grouped.values()) + #return sum(group["matches"] for group in self.grouped.values()) + return self.grouped.get(self.group_field,{}).get('matches',0) + + @property + def docs(self) -> List: + """Unlike `QueryResponse.docs`, this returns a list of groups with nested documents. + + :return: _description_ + :rtype: List + """ + return self.grouped.get(self.group_field,{}).get('groups',[]) + + class SolrClient(ClientBase): From 0de49d300a19b4f2622c37e9d7027aacfa1af572 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 15 Dec 2022 16:42:17 -0500 Subject: [PATCH 05/15] Add group method to queryset; update tests for changed get_response --- parasolr/query/aliased_queryset.py | 7 +++ parasolr/query/queryset.py | 65 +++++++++++++++++++-------- parasolr/query/tests/test_queryset.py | 24 +++++----- 3 files changed, 66 insertions(+), 30 deletions(-) diff --git a/parasolr/query/aliased_queryset.py b/parasolr/query/aliased_queryset.py index 9e0394f..96e9700 100644 --- a/parasolr/query/aliased_queryset.py +++ b/parasolr/query/aliased_queryset.py @@ -102,6 +102,13 @@ def highlight(self, field: str, **kwargs) -> "AliasedSolrQuerySet": field = self.field_aliases.get(field, field) return super().highlight(field, **kwargs) + def group(self, field: str, **kwargs) -> "AliasedSolrQuerySet": + """Extend :meth:`parasolr.query.queryset.SolrQuerySet.group` + to support using aliased field names in kwargs.""" + field = self.field_aliases.get(field, field) + # TODO: should we also reverse alias for sort option if specified? + return super().group(field, **kwargs) + def get_facets(self) -> Dict[str, int]: """Extend :meth:`parasolr.query.queryset.SolrQuerySet.get_facets` to use aliased field names for facet and range facet keys.""" diff --git a/parasolr/query/queryset.py b/parasolr/query/queryset.py index 73ff1d2..616b8f9 100644 --- a/parasolr/query/queryset.py +++ b/parasolr/query/queryset.py @@ -38,12 +38,14 @@ class SolrQuerySet: filter_qs = [] field_list = [] highlight_fields = [] + group_field = None facet_field_list = [] stats_field_list = [] range_facet_fields = [] facet_opts = {} stats_opts = {} highlight_opts = {} + group_opts = {} raw_params = {} #: by default, combine search queries with AND @@ -60,9 +62,6 @@ def __init__(self, solr: SolrClient): # convert search operator into form needed for combining queries self._search_op = " %s " % self.default_search_operator - - - def get_response(self, **kwargs) -> List[dict]: """ Query Solr and get the results for the current query and filter @@ -76,21 +75,25 @@ def get_response(self, **kwargs) -> List[dict]: # if query options have changed? # For now, always query. - if not self._result_cache: - - query_opts = self.query_opts() - query_opts.update(**kwargs) + # if cached and no override query args are specified, + # return existing cached result + if self._result_cache and not kwargs: + return self._result_cache + + query_opts = self.query_opts() + query_opts.update(**kwargs) - # TODO: what do we do about the fact that Solr defaults - # to 10 rows? + # TODO: what do we do about the fact that Solr defaults + # to 10 rows? - # NOTE: django templates choke on AttrDict because it is - # callable; using dictionary response instead - - self._result_cache = self.solr.query(**query_opts) + # note that we're caching the result with override options here, + # which may not always be the right thing to do ... + self._result_cache = self.solr.query(**query_opts) - return self._result_cache + # NOTE: django templates choke on AttrDict because it is + # callable; using dictionary response instead + return self._result_cache def get_results(self, **kwargs) -> List[dict]: """ @@ -105,10 +108,7 @@ def get_results(self, **kwargs) -> List[dict]: response = self.get_response(**kwargs) # if there is a query error, result will not be set if response: - return [ - self.get_result_document(doc) - for doc in self._result_cache.docs - ] + return [self.get_result_document(doc) for doc in self._result_cache.docs] return [] def get_result_document(self, doc): @@ -125,6 +125,14 @@ def _set_highlighting_opts(self, query_opts: Dict) -> None: # (prefixes added in highlight methods) query_opts.update(self.highlight_opts) + def _set_group_opts(self, query_opts: Dict) -> None: + """Configure grouping atrtibutes on query_opts. Modifies dictionary + directly.""" + if self.group_field: + query_opts.update({"group": True, "group.field": self.group_field}) + # any other group options can be added as-is + query_opts.update(self.group_opts) + def _set_faceting_opts(self, query_opts: Dict) -> None: """Configure faceting attributes directly on query_opts. Modifies dictionary directly.""" @@ -172,6 +180,9 @@ def query_opts(self) -> Dict[str, str]: # highlighting self._set_highlighting_opts(query_opts) + # grouping + self._set_group_opts(query_opts) + # faceting self._set_faceting_opts(query_opts) @@ -554,6 +565,22 @@ def highlight(self, field: str, **kwargs) -> "SolrQuerySet": return qs_copy + def group(self, field: str, **kwargs) -> "SolrQuerySet": + """ "Configure grouping. Takes arbitrary Solr group + parameters and adds the `group.` prefix to them. Example use:: + + queryset.group('group_id', limit=3, sort='order asc') + """ + qs_copy = self._clone() + # store group field and grouping options + # for now, assuming single group field + qs_copy.group_field = field + qs_copy.group_opts.update( + {"group.%s" % (field, opt): value for opt, value in kwargs.items()} + ) + + return qs_copy + def raw_query_parameters(self, **kwargs) -> "SolrQuerySet": """Add abritrary raw parameters to be included in the query request, e.g. for variables referenced in join or field queries. @@ -589,6 +616,7 @@ def _clone(self) -> "SolrQuerySet": qs_copy.start = self.start qs_copy.stop = self.stop qs_copy.highlight_fields = list(self.highlight_fields) + qs_copy.group_field = self.group_field # set copies of list and dict attributes qs_copy.search_qs = list(self.search_qs) @@ -597,6 +625,7 @@ def _clone(self) -> "SolrQuerySet": qs_copy.field_list = list(self.field_list) qs_copy.range_facet_fields = list(self.range_facet_fields) qs_copy.highlight_opts = dict(self.highlight_opts) + qs_copy.group_opts = dict(self.group_opts) qs_copy.raw_params = dict(self.raw_params) qs_copy.facet_field_list = list(self.facet_field_list) qs_copy.facet_opts = dict(self.facet_opts) diff --git a/parasolr/query/tests/test_queryset.py b/parasolr/query/tests/test_queryset.py index 2a7ae49..7814608 100644 --- a/parasolr/query/tests/test_queryset.py +++ b/parasolr/query/tests/test_queryset.py @@ -34,6 +34,8 @@ def test_query_opts(self): "facet", "stats", "stats.field", + "group", + "group.field", ]: assert opt not in query_opts @@ -97,6 +99,16 @@ def test_query_opts(self): assert query_opts["facet.range"] == sqs.range_facet_fields assert range_facet_opt in query_opts + def test_query_opts_group(self): + mocksolr = Mock(spec=SolrClient) + sqs = SolrQuerySet(mocksolr) + sqs.group_field = "group_id" + sqs.group_opts = {"group.limit": 3} + query_opts = sqs.query_opts() + assert query_opts["group"] == True + assert query_opts["group.field"] == "group_id" + assert query_opts["group.limit"] == 3 + def test_query(self): mocksolr = Mock(spec=SolrClient) mocksolr.query.return_value.docs = [] @@ -531,18 +543,6 @@ def test_get_highlighting(self): sqs._result_cache = Mock(highlighting=mock_highlights) assert sqs.get_highlighting() == mock_highlights - # should populate cache if empty - sqs._result_cache = None - with patch.object(sqs, "get_results") as mock_get_results: - - def set_result_cache(): - sqs._result_cache = Mock() - - mock_get_results.side_effect = set_result_cache - - sqs.get_highlighting() - mock_get_results.assert_called_with() - def test_all(self): mocksolr = Mock(spec=SolrClient) sqs = SolrQuerySet(mocksolr) From ec72eb3f140ca5ac32b22b5a35686cd0d21f5f23 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 19 Dec 2022 11:40:18 -0500 Subject: [PATCH 06/15] Fix group field syntax and add tests for group & alias group methods --- parasolr/query/queryset.py | 2 +- parasolr/query/tests/test_aliased_queryset.py | 9 +++++++++ parasolr/query/tests/test_queryset.py | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/parasolr/query/queryset.py b/parasolr/query/queryset.py index 616b8f9..b927f33 100644 --- a/parasolr/query/queryset.py +++ b/parasolr/query/queryset.py @@ -576,7 +576,7 @@ def group(self, field: str, **kwargs) -> "SolrQuerySet": # for now, assuming single group field qs_copy.group_field = field qs_copy.group_opts.update( - {"group.%s" % (field, opt): value for opt, value in kwargs.items()} + {"group.%s" % opt: value for opt, value in kwargs.items()} ) return qs_copy diff --git a/parasolr/query/tests/test_aliased_queryset.py b/parasolr/query/tests/test_aliased_queryset.py index b30af28..2614edb 100644 --- a/parasolr/query/tests/test_aliased_queryset.py +++ b/parasolr/query/tests/test_aliased_queryset.py @@ -145,6 +145,15 @@ def test_highlight(self, mock_highlight): self.mysqs.highlight("foo_b") mock_highlight.assert_called_with("foo_b") + @patch("parasolr.query.queryset.SolrQuerySet.group") + def test_group(self, mock_group): + # args should be unaliased + self.mysqs.group("name") + mock_group.assert_called_with(self.mysqs.field_aliases["name"]) + # unknown should be ignored + self.mysqs.group("foo_b") + mock_group.assert_called_with("foo_b") + @patch("parasolr.query.queryset.SolrQuerySet.get_facets") def test_get_facets(self, mock_get_facets): sample_facet_result = { diff --git a/parasolr/query/tests/test_queryset.py b/parasolr/query/tests/test_queryset.py index 7814608..3fd3040 100644 --- a/parasolr/query/tests/test_queryset.py +++ b/parasolr/query/tests/test_queryset.py @@ -506,6 +506,14 @@ def test_highlight(self): assert sqs.highlight_fields == [] assert sqs.highlight_opts == {} + def test_group(self): + mocksolr = Mock(spec=SolrClient) + sqs = SolrQuerySet(mocksolr) + # field only, defaults + group_qs = sqs.group("content", limit=3) + assert group_qs.group_field == "content" + assert group_qs.group_opts == {"group.limit": 3} + def test_raw_query_parameters(self): mocksolr = Mock(spec=SolrClient) sqs = SolrQuerySet(mocksolr) From de2bd0277759646e4feffd2418a70013e1fc36f3 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 19 Dec 2022 11:45:05 -0500 Subject: [PATCH 07/15] Address review comments --- parasolr/query/queryset.py | 10 +++++++--- parasolr/solr/client.py | 18 ++++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/parasolr/query/queryset.py b/parasolr/query/queryset.py index b927f33..2a89d83 100644 --- a/parasolr/query/queryset.py +++ b/parasolr/query/queryset.py @@ -30,7 +30,6 @@ class SolrQuerySet: """ _result_cache = None - _result_cache_d = {} start = 0 stop = None sort_options = [] @@ -108,6 +107,9 @@ def get_results(self, **kwargs) -> List[dict]: response = self.get_response(**kwargs) # if there is a query error, result will not be set if response: + # TODO: need to handle result doc tranformatiion on grouped response. + # intentionally applying to .docs instead of .items to trigger + # an error if we try to use on grouped response return [self.get_result_document(doc) for doc in self._result_cache.docs] return [] @@ -567,7 +569,9 @@ def highlight(self, field: str, **kwargs) -> "SolrQuerySet": def group(self, field: str, **kwargs) -> "SolrQuerySet": """ "Configure grouping. Takes arbitrary Solr group - parameters and adds the `group.` prefix to them. Example use:: + parameters and adds the `group.` prefix to them. Example use, + grouping on a `group_id` field, limiting to three results per group, + and sorting group members by an `order` field:: queryset.group('group_id', limit=3, sort='order asc') """ @@ -666,7 +670,7 @@ def __getitem__(self, k): # if the result cache is already populated, # return the requested index or slice if self._result_cache: - return self._result_cache.docs[k] + return self._result_cache.items[k] qs_copy = self._clone() diff --git a/parasolr/solr/client.py b/parasolr/solr/client.py index cc00f89..2c3f0d3 100644 --- a/parasolr/solr/client.py +++ b/parasolr/solr/client.py @@ -121,6 +121,10 @@ def start(self) -> int: def docs(self) -> List: return self.document_list.docs + @property + def items(self) -> List: + return self.docs + class GroupedResponse(BaseResponse): """Query response variant for grouped results. @@ -144,25 +148,27 @@ def __init__(self, response: Dict) -> None: @property def group_field(self) -> str: "group.field as stored in the params. Not yet supporting grouping by query." - return self.params.get('group.field','') + return self.params.get("group.field", "") @property def numFound(self) -> int: # each field used for grouping has a total # for the number of matches in that grouping - #return sum(group["matches"] for group in self.grouped.values()) - return self.grouped.get(self.group_field,{}).get('matches',0) + # return sum(group["matches"] for group in self.grouped.values()) + return self.grouped.get(self.group_field, {}).get("matches", 0) @property - def docs(self) -> List: + def groups(self) -> List: """Unlike `QueryResponse.docs`, this returns a list of groups with nested documents. :return: _description_ :rtype: List """ - return self.grouped.get(self.group_field,{}).get('groups',[]) - + return self.grouped.get(self.group_field, {}).get("groups", []) + @property + def items(self) -> List: + return self.groups class SolrClient(ClientBase): From f3b54bbeb98f9851cf1eb8b6f4f5b62015637e90 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 19 Dec 2022 12:54:24 -0500 Subject: [PATCH 08/15] Add new queryset group method to pytest plugin --- parasolr/pytest_plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parasolr/pytest_plugin.py b/parasolr/pytest_plugin.py index c660c83..b40a7d4 100644 --- a/parasolr/pytest_plugin.py +++ b/parasolr/pytest_plugin.py @@ -145,6 +145,7 @@ def get_mock_solr_queryset(spec=SolrQuerySet, extra_methods=[]): "query", "only", "also", + "group", "highlight", "raw_query_parameters", "all", From 14d79ba244c3b6dcb025920f98d8624fc8f7d5e1 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 14 Feb 2023 15:19:12 -0500 Subject: [PATCH 09/15] Add a new indexable classmethod for prefetching on chunks of index items --- CHANGELOG.rst | 6 ++++++ parasolr/indexing.py | 10 ++++++++++ parasolr/management/commands/index.py | 8 +++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 831e23d..88bc662 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,12 @@ CHANGELOG ========= +0.9 +--- + +* New class method `prep_index_chunk` on ``Indexable`` class, to support + prefetching related objects when iterating over Django querysets + 0.8.2 ----- diff --git a/parasolr/indexing.py b/parasolr/indexing.py index 5e3b95b..3c7e1ef 100644 --- a/parasolr/indexing.py +++ b/parasolr/indexing.py @@ -120,6 +120,15 @@ def total_to_index(cls): except AttributeError: raise NotImplementedError + @classmethod + def prep_index_chunk(cls, chunk): + """Optional method for any additional processing on chunks + of items being indexed. Intended to allow adding prefetching on + a chunk when iterating on Django QuerySets; since indexing uses Iterator, + prefetching configured in `items_to_index` is ignored.""" + # default behavior is to do nothing; return chunk unchanged + return chunk + def index_id(self): """Solr identifier. By default, combines :meth:`index item_type` and :attr:`id` with :attr:ID_SEPARATOR`.""" @@ -168,6 +177,7 @@ def index_items(cls, items, progbar=None): count = 0 while chunk: # call index data method if present; otherwise assume item is dict + chunk = cls.prep_index_chunk(chunk) cls.solr.update.index( [i.index_data() if hasattr(i, "index_data") else i for i in chunk] ) diff --git a/parasolr/management/commands/index.py b/parasolr/management/commands/index.py index 8ecdaeb..ea4a1d5 100644 --- a/parasolr/management/commands/index.py +++ b/parasolr/management/commands/index.py @@ -157,7 +157,9 @@ def handle(self, *args, **kwargs): for name, model in self.indexables.items(): if self.options["index"] in [name, "all"]: # index in chunks and update progress bar - count += self.index(model.items_to_index(), progbar=progbar) + # pass in indexable class to ensure we use prefetching + # and chunk size specific to that class + count += self.index(model, model.items_to_index(), progbar=progbar) if progbar: progbar.finish() @@ -170,11 +172,11 @@ def handle(self, *args, **kwargs): # using format for comma-separated numbers self.stdout.write("Indexed {:,} item{}".format(count, pluralize(count))) - def index(self, index_data, progbar=None): + def index(self, indexable, index_data, progbar=None): """Index an iterable into the configured solr""" try: # index in chunks and update progress bar if there is one - return Indexable.index_items(index_data, progbar=progbar) + return indexable.index_items(index_data, progbar=progbar) except requests.exceptions.ConnectionError as err: # bail out if we error connecting to Solr raise CommandError(err) From e5790678ee04f079dab216518eece5609010fbb7 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 15 Feb 2023 09:08:26 -0500 Subject: [PATCH 10/15] Fix error introduced in index script --- parasolr/management/commands/index.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/parasolr/management/commands/index.py b/parasolr/management/commands/index.py index ea4a1d5..e1990ab 100644 --- a/parasolr/management/commands/index.py +++ b/parasolr/management/commands/index.py @@ -150,6 +150,8 @@ def handle(self, *args, **kwargs): # index items requested if to_index: # list of objects already gathered + # items are not guaranteed to be the same subclass of Indexable, + # so we don't specify and use the base Indexable class count += self.index(to_index, progbar=progbar) else: @@ -159,7 +161,9 @@ def handle(self, *args, **kwargs): # index in chunks and update progress bar # pass in indexable class to ensure we use prefetching # and chunk size specific to that class - count += self.index(model, model.items_to_index(), progbar=progbar) + count += self.index( + model.items_to_index(), progbar=progbar, indexable=model + ) if progbar: progbar.finish() @@ -172,8 +176,11 @@ def handle(self, *args, **kwargs): # using format for comma-separated numbers self.stdout.write("Indexed {:,} item{}".format(count, pluralize(count))) - def index(self, indexable, index_data, progbar=None): + def index(self, index_data, progbar=None, indexable=None): """Index an iterable into the configured solr""" + # if indexable subclass is not specified use the base class + if indexable is None: + indexable = Indexable try: # index in chunks and update progress bar if there is one return indexable.index_items(index_data, progbar=progbar) From 143ff1f31d6777345f97f7f7971f7580eea6be8e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 21 Feb 2023 09:55:11 -0500 Subject: [PATCH 11/15] Set version to 0.9 --- parasolr/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parasolr/__init__.py b/parasolr/__init__.py index 9528ed6..ff89738 100644 --- a/parasolr/__init__.py +++ b/parasolr/__init__.py @@ -1,6 +1,6 @@ default_app_config = "parasolr.apps.ParasolConfig" -__version_info__ = (0, 8, 2, None) +__version_info__ = (0, 9, 0, None) # Dot-connect all but the last. Last is dash-connected if not None. __version__ = ".".join([str(i) for i in __version_info__[:-1]]) From a86a5fe3aa6a0fc9a53b132231380a14f859dcfb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 21 Feb 2023 09:58:24 -0500 Subject: [PATCH 12/15] Update changelog for version 0.9 --- CHANGELOG.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 88bc662..b2acb6e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,8 +6,11 @@ CHANGELOG 0.9 --- -* New class method `prep_index_chunk` on ``Indexable`` class, to support +* ``SolrQuerySet`` now supports Solr grouping functionality via new `group` + method and `GroupedResponse` +* New class method `prep_index_chunk` on ``Indexable`` class, to support prefetching related objects when iterating over Django querysets +* Include django view mixins in sphinx documentation 0.8.2 ----- From 160ecdee6411ca30ff86bae1b5a46611d4e90d46 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 24 Feb 2023 08:46:33 -0500 Subject: [PATCH 13/15] =?UTF-8?q?Revise=20todos=20=E2=80=94=20add=20notes,?= =?UTF-8?q?=20update=20docs=20to=20reflect=20status?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.rst | 4 ++-- parasolr/query/aliased_queryset.py | 3 ++- parasolr/query/queryset.py | 11 ++++++----- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b2acb6e..596a446 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,10 +6,10 @@ CHANGELOG 0.9 --- -* ``SolrQuerySet`` now supports Solr grouping functionality via new `group` +* ``SolrQuerySet`` now supports Solr grouping via new `group` method and `GroupedResponse` * New class method `prep_index_chunk` on ``Indexable`` class, to support - prefetching related objects when iterating over Django querysets + prefetching related objects when iterating over Django querysets for indexing * Include django view mixins in sphinx documentation 0.8.2 diff --git a/parasolr/query/aliased_queryset.py b/parasolr/query/aliased_queryset.py index 96e9700..ec601ca 100644 --- a/parasolr/query/aliased_queryset.py +++ b/parasolr/query/aliased_queryset.py @@ -104,7 +104,8 @@ def highlight(self, field: str, **kwargs) -> "AliasedSolrQuerySet": def group(self, field: str, **kwargs) -> "AliasedSolrQuerySet": """Extend :meth:`parasolr.query.queryset.SolrQuerySet.group` - to support using aliased field names in kwargs.""" + to support using aliased field names in kwargs. (Note that sorting + does not currently support aliased field names).""" field = self.field_aliases.get(field, field) # TODO: should we also reverse alias for sort option if specified? return super().group(field, **kwargs) diff --git a/parasolr/query/queryset.py b/parasolr/query/queryset.py index 2a89d83..3d7ae46 100644 --- a/parasolr/query/queryset.py +++ b/parasolr/query/queryset.py @@ -82,8 +82,8 @@ def get_response(self, **kwargs) -> List[dict]: query_opts = self.query_opts() query_opts.update(**kwargs) - # TODO: what do we do about the fact that Solr defaults - # to 10 rows? + # NOTE: still need to work around Solr default of 10 rows + # see https://github.com/Princeton-CDH/parasolr/issues/43 # note that we're caching the result with override options here, # which may not always be the right thing to do ... @@ -99,6 +99,7 @@ def get_results(self, **kwargs) -> List[dict]: Query Solr and get the results for the current query and filter options. Populates result cache and returns the documents portion of the reponse. + (Note that this method is not currently compatible with grouping.) Returns: Solr response documents as a list of dictionaries. @@ -107,9 +108,9 @@ def get_results(self, **kwargs) -> List[dict]: response = self.get_response(**kwargs) # if there is a query error, result will not be set if response: - # TODO: need to handle result doc tranformatiion on grouped response. - # intentionally applying to .docs instead of .items to trigger - # an error if we try to use on grouped response + # NOTE: should probably handle result doc tranformation on grouped responses. + # Intentionally applying to .docs instead of .items to trigger + # an error if anyone attempts to use this on a grouped response return [self.get_result_document(doc) for doc in self._result_cache.docs] return [] From a53311c2256273f8db8db09638f2e6d191e37d64 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 24 Feb 2023 10:14:27 -0500 Subject: [PATCH 14/15] Update python/django versions in test matrix --- .github/workflows/unit_tests.yml | 6 +++--- CHANGELOG.rst | 2 ++ README.rst | 2 +- setup.py | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index d733c0b..2a3fb83 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -12,9 +12,9 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [3.6, 3.8] - solr: [8.6, 6.6] - django: [0, 2.2, 3.0, 3.1] + python: [3.8, 3.9] + solr: [8.6] + django: [0, 3.0, 3.1, 3.2] # We use service containers to avoid needing to set up a local copy of # mysql or postgres on the test runner instance. This syntax is similar to # the spec of a docker-compose file. For more, see: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 596a446..e6eb477 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,8 @@ CHANGELOG * New class method `prep_index_chunk` on ``Indexable`` class, to support prefetching related objects when iterating over Django querysets for indexing * Include django view mixins in sphinx documentation +* Dropped support for python 3.6; added python 3.9 +* Dropped support for Django 2.2; added Django 3.2 0.8.2 ----- diff --git a/README.rst b/README.rst index 01f4056..20ef802 100644 --- a/README.rst +++ b/README.rst @@ -57,7 +57,7 @@ configuration and indexing content. .. image:: https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336 :target: https://pycqa.github.io/isort/ -Currently tested against Python 3.6 and 3.8, Solr 6.6.5 and 8.6.2, and Django 2.2-3.1 and without Django. +Currently tested against Python 3.8 and 3.9, Solr 8.6.2, and Django 3.0-3.2 and without Django. Installation diff --git a/setup.py b/setup.py index 4efa365..ee69c7a 100644 --- a/setup.py +++ b/setup.py @@ -46,17 +46,17 @@ "Environment :: Web Environment", "Development Status :: 2 - Pre-Alpha", "Framework :: Django", - "Framework :: Django :: 2.2", "Framework :: Django :: 3.0", "Framework :: Django :: 3.1", + "Framework :: Django :: 3.2", "Framework :: Pytest", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Internet :: WWW/HTTP", "Topic :: Database", ], From 68a099c15ac26a651b1af242628f2a3baec59ace Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 24 Feb 2023 10:23:45 -0500 Subject: [PATCH 15/15] Update mock for change to items/docs in solr response object --- CHANGELOG.rst | 1 + parasolr/query/tests/test_queryset.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e6eb477..bce86c6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ CHANGELOG * Include django view mixins in sphinx documentation * Dropped support for python 3.6; added python 3.9 * Dropped support for Django 2.2; added Django 3.2 +* No longer tested against Solr 6.6 0.8.2 ----- diff --git a/parasolr/query/tests/test_queryset.py b/parasolr/query/tests/test_queryset.py index 3fd3040..d113339 100644 --- a/parasolr/query/tests/test_queryset.py +++ b/parasolr/query/tests/test_queryset.py @@ -767,7 +767,7 @@ def test_get_item(self): # simulate result cache already populated sqs._result_cache = Mock() - sqs._result_cache.docs = [1, 2, 3, 4, 5] + sqs._result_cache.items = [1, 2, 3, 4, 5] # single item assert sqs[0] == 1 assert sqs[1] == 2