Skip to content

Commit

Permalink
Merge branch 'release/0.9' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
rlskoeser committed Feb 24, 2023
2 parents 716ad22 + 68a099c commit 88f6cc1
Show file tree
Hide file tree
Showing 15 changed files with 328 additions and 53 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python: [3.6, 3.8]
solr: [8.6, 6.6]
django: [0, 2.2, 3.0, 3.1]
python: [3.8, 3.9]
solr: [8.6]
django: [0, 3.0, 3.1, 3.2]
# We use service containers to avoid needing to set up a local copy of
# mysql or postgres on the test runner instance. This syntax is similar to
# the spec of a docker-compose file. For more, see:
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ CHANGELOG
0.8.2
-----

* ``SolrQuerySet`` now supports Solr grouping via new `group`
method and `GroupedResponse`
* New class method `prep_index_chunk` on ``Indexable`` class, to support
prefetching related objects when iterating over Django querysets for indexing
* Include django view mixins in sphinx documentation
* Dropped support for python 3.6; added python 3.9
* Dropped support for Django 2.2; added Django 3.2
* No longer tested against Solr 6.6

0.8.2
-----

* When subclassing ``SolrQuerySet``, result documents can now be customized by extending ``get_result_document``

0.8.1
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ configuration and indexing content.
.. image:: https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336
:target: https://pycqa.github.io/isort/

Currently tested against Python 3.6 and 3.8, Solr 6.6.5 and 8.6.2, and Django 2.2-3.1 and without Django.
Currently tested against Python 3.8 and 3.9, Solr 8.6.2, and Django 3.0-3.2 and without Django.


Installation
Expand Down
2 changes: 1 addition & 1 deletion parasolr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
default_app_config = "parasolr.apps.ParasolConfig"

__version_info__ = (0, 9, 0, "dev")
__version_info__ = (0, 9, 0, None)

# Dot-connect all but the last. Last is dash-connected if not None.
__version__ = ".".join([str(i) for i in __version_info__[:-1]])
Expand Down
10 changes: 10 additions & 0 deletions parasolr/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ def total_to_index(cls):
except AttributeError:
raise NotImplementedError

@classmethod
def prep_index_chunk(cls, chunk):
"""Optional method for any additional processing on chunks
of items being indexed. Intended to allow adding prefetching on
a chunk when iterating on Django QuerySets; since indexing uses Iterator,
prefetching configured in `items_to_index` is ignored."""
# default behavior is to do nothing; return chunk unchanged
return chunk

def index_id(self):
"""Solr identifier. By default, combines :meth:`index item_type`
and :attr:`id` with :attr:ID_SEPARATOR`."""
Expand Down Expand Up @@ -168,6 +177,7 @@ def index_items(cls, items, progbar=None):
count = 0
while chunk:
# call index data method if present; otherwise assume item is dict
chunk = cls.prep_index_chunk(chunk)
cls.solr.update.index(
[i.index_data() if hasattr(i, "index_data") else i for i in chunk]
)
Expand Down
15 changes: 12 additions & 3 deletions parasolr/management/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,20 @@ def handle(self, *args, **kwargs):
# index items requested
if to_index:
# list of objects already gathered
# items are not guaranteed to be the same subclass of Indexable,
# so we don't specify and use the base Indexable class
count += self.index(to_index, progbar=progbar)

else:
# iterate over indexables by type and index if requested
for name, model in self.indexables.items():
if self.options["index"] in [name, "all"]:
# index in chunks and update progress bar
count += self.index(model.items_to_index(), progbar=progbar)
# pass in indexable class to ensure we use prefetching
# and chunk size specific to that class
count += self.index(
model.items_to_index(), progbar=progbar, indexable=model
)

if progbar:
progbar.finish()
Expand All @@ -170,11 +176,14 @@ def handle(self, *args, **kwargs):
# using format for comma-separated numbers
self.stdout.write("Indexed {:,} item{}".format(count, pluralize(count)))

def index(self, index_data, progbar=None):
def index(self, index_data, progbar=None, indexable=None):
"""Index an iterable into the configured solr"""
# if indexable subclass is not specified use the base class
if indexable is None:
indexable = Indexable
try:
# index in chunks and update progress bar if there is one
return Indexable.index_items(index_data, progbar=progbar)
return indexable.index_items(index_data, progbar=progbar)
except requests.exceptions.ConnectionError as err:
# bail out if we error connecting to Solr
raise CommandError(err)
Expand Down
1 change: 1 addition & 0 deletions parasolr/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def get_mock_solr_queryset(spec=SolrQuerySet, extra_methods=[]):
"query",
"only",
"also",
"group",
"highlight",
"raw_query_parameters",
"all",
Expand Down
8 changes: 8 additions & 0 deletions parasolr/query/aliased_queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ def highlight(self, field: str, **kwargs) -> "AliasedSolrQuerySet":
field = self.field_aliases.get(field, field)
return super().highlight(field, **kwargs)

def group(self, field: str, **kwargs) -> "AliasedSolrQuerySet":
"""Extend :meth:`parasolr.query.queryset.SolrQuerySet.group`
to support using aliased field names in kwargs. (Note that sorting
does not currently support aliased field names)."""
field = self.field_aliases.get(field, field)
# TODO: should we also reverse alias for sort option if specified?
return super().group(field, **kwargs)

def get_facets(self) -> Dict[str, int]:
"""Extend :meth:`parasolr.query.queryset.SolrQuerySet.get_facets`
to use aliased field names for facet and range facet keys."""
Expand Down
76 changes: 67 additions & 9 deletions parasolr/query/queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@ class SolrQuerySet:
filter_qs = []
field_list = []
highlight_fields = []
group_field = None
facet_field_list = []
stats_field_list = []
range_facet_fields = []
facet_opts = {}
stats_opts = {}
highlight_opts = {}
group_opts = {}
raw_params = {}

#: by default, combine search queries with AND
Expand All @@ -59,7 +61,7 @@ def __init__(self, solr: SolrClient):
# convert search operator into form needed for combining queries
self._search_op = " %s " % self.default_search_operator

def get_results(self, **kwargs) -> List[dict]:
def get_response(self, **kwargs) -> List[dict]:
"""
Query Solr and get the results for the current query and filter
options. Populates result cache and returns the documents portion
Expand All @@ -72,16 +74,43 @@ def get_results(self, **kwargs) -> List[dict]:
# if query options have changed?
# For now, always query.

# if cached and no override query args are specified,
# return existing cached result
if self._result_cache and not kwargs:
return self._result_cache

query_opts = self.query_opts()
query_opts.update(**kwargs)
# TODO: what do we do about the fact that Solr defaults
# to 10 rows?

# NOTE: still need to work around Solr default of 10 rows
# see https://github.com/Princeton-CDH/parasolr/issues/43

# note that we're caching the result with override options here,
# which may not always be the right thing to do ...
self._result_cache = self.solr.query(**query_opts)

# NOTE: django templates choke on AttrDict because it is
# callable; using dictionary response instead
self._result_cache = self.solr.query(**query_opts)

return self._result_cache

def get_results(self, **kwargs) -> List[dict]:
"""
Query Solr and get the results for the current query and filter
options. Populates result cache and returns the documents portion
of the reponse.
(Note that this method is not currently compatible with grouping.)
Returns:
Solr response documents as a list of dictionaries.
"""
# get query response
response = self.get_response(**kwargs)
# if there is a query error, result will not be set
if self._result_cache:
if response:
# NOTE: should probably handle result doc tranformation on grouped responses.
# Intentionally applying to .docs instead of .items to trigger
# an error if anyone attempts to use this on a grouped response
return [self.get_result_document(doc) for doc in self._result_cache.docs]
return []

Expand All @@ -99,6 +128,14 @@ def _set_highlighting_opts(self, query_opts: Dict) -> None:
# (prefixes added in highlight methods)
query_opts.update(self.highlight_opts)

def _set_group_opts(self, query_opts: Dict) -> None:
"""Configure grouping atrtibutes on query_opts. Modifies dictionary
directly."""
if self.group_field:
query_opts.update({"group": True, "group.field": self.group_field})
# any other group options can be added as-is
query_opts.update(self.group_opts)

def _set_faceting_opts(self, query_opts: Dict) -> None:
"""Configure faceting attributes directly on query_opts. Modifies
dictionary directly."""
Expand Down Expand Up @@ -146,6 +183,9 @@ def query_opts(self) -> Dict[str, str]:
# highlighting
self._set_highlighting_opts(query_opts)

# grouping
self._set_group_opts(query_opts)

# faceting
self._set_faceting_opts(query_opts)

Expand Down Expand Up @@ -528,6 +568,24 @@ def highlight(self, field: str, **kwargs) -> "SolrQuerySet":

return qs_copy

def group(self, field: str, **kwargs) -> "SolrQuerySet":
""" "Configure grouping. Takes arbitrary Solr group
parameters and adds the `group.` prefix to them. Example use,
grouping on a `group_id` field, limiting to three results per group,
and sorting group members by an `order` field::
queryset.group('group_id', limit=3, sort='order asc')
"""
qs_copy = self._clone()
# store group field and grouping options
# for now, assuming single group field
qs_copy.group_field = field
qs_copy.group_opts.update(
{"group.%s" % opt: value for opt, value in kwargs.items()}
)

return qs_copy

def raw_query_parameters(self, **kwargs) -> "SolrQuerySet":
"""Add abritrary raw parameters to be included in the query
request, e.g. for variables referenced in join or field queries.
Expand All @@ -538,9 +596,7 @@ def raw_query_parameters(self, **kwargs) -> "SolrQuerySet":

def get_highlighting(self) -> Dict[str, Dict[str, List]]:
"""Return the highlighting portion of the Solr response."""
if not self._result_cache:
self.get_results()
return self._result_cache.highlighting
return self.get_response().highlighting

def all(self) -> "SolrQuerySet":
"""Return a new queryset that is a copy of the current one."""
Expand All @@ -565,6 +621,7 @@ def _clone(self) -> "SolrQuerySet":
qs_copy.start = self.start
qs_copy.stop = self.stop
qs_copy.highlight_fields = list(self.highlight_fields)
qs_copy.group_field = self.group_field

# set copies of list and dict attributes
qs_copy.search_qs = list(self.search_qs)
Expand All @@ -573,6 +630,7 @@ def _clone(self) -> "SolrQuerySet":
qs_copy.field_list = list(self.field_list)
qs_copy.range_facet_fields = list(self.range_facet_fields)
qs_copy.highlight_opts = dict(self.highlight_opts)
qs_copy.group_opts = dict(self.group_opts)
qs_copy.raw_params = dict(self.raw_params)
qs_copy.facet_field_list = list(self.facet_field_list)
qs_copy.facet_opts = dict(self.facet_opts)
Expand Down Expand Up @@ -613,7 +671,7 @@ def __getitem__(self, k):
# if the result cache is already populated,
# return the requested index or slice
if self._result_cache:
return self._result_cache.docs[k]
return self._result_cache.items[k]

qs_copy = self._clone()

Expand Down
9 changes: 9 additions & 0 deletions parasolr/query/tests/test_aliased_queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ def test_highlight(self, mock_highlight):
self.mysqs.highlight("foo_b")
mock_highlight.assert_called_with("foo_b")

@patch("parasolr.query.queryset.SolrQuerySet.group")
def test_group(self, mock_group):
# args should be unaliased
self.mysqs.group("name")
mock_group.assert_called_with(self.mysqs.field_aliases["name"])
# unknown should be ignored
self.mysqs.group("foo_b")
mock_group.assert_called_with("foo_b")

@patch("parasolr.query.queryset.SolrQuerySet.get_facets")
def test_get_facets(self, mock_get_facets):
sample_facet_result = {
Expand Down
34 changes: 21 additions & 13 deletions parasolr/query/tests/test_queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def test_query_opts(self):
"facet",
"stats",
"stats.field",
"group",
"group.field",
]:
assert opt not in query_opts

Expand Down Expand Up @@ -97,6 +99,16 @@ def test_query_opts(self):
assert query_opts["facet.range"] == sqs.range_facet_fields
assert range_facet_opt in query_opts

def test_query_opts_group(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)
sqs.group_field = "group_id"
sqs.group_opts = {"group.limit": 3}
query_opts = sqs.query_opts()
assert query_opts["group"] == True
assert query_opts["group.field"] == "group_id"
assert query_opts["group.limit"] == 3

def test_query(self):
mocksolr = Mock(spec=SolrClient)
mocksolr.query.return_value.docs = []
Expand Down Expand Up @@ -494,6 +506,14 @@ def test_highlight(self):
assert sqs.highlight_fields == []
assert sqs.highlight_opts == {}

def test_group(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)
# field only, defaults
group_qs = sqs.group("content", limit=3)
assert group_qs.group_field == "content"
assert group_qs.group_opts == {"group.limit": 3}

def test_raw_query_parameters(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)
Expand Down Expand Up @@ -531,18 +551,6 @@ def test_get_highlighting(self):
sqs._result_cache = Mock(highlighting=mock_highlights)
assert sqs.get_highlighting() == mock_highlights

# should populate cache if empty
sqs._result_cache = None
with patch.object(sqs, "get_results") as mock_get_results:

def set_result_cache():
sqs._result_cache = Mock()

mock_get_results.side_effect = set_result_cache

sqs.get_highlighting()
mock_get_results.assert_called_with()

def test_all(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)
Expand Down Expand Up @@ -759,7 +767,7 @@ def test_get_item(self):

# simulate result cache already populated
sqs._result_cache = Mock()
sqs._result_cache.docs = [1, 2, 3, 4, 5]
sqs._result_cache.items = [1, 2, 3, 4, 5]
# single item
assert sqs[0] == 1
assert sqs[1] == 2
Expand Down
Loading

0 comments on commit 88f6cc1

Please sign in to comment.