Skip to content

Commit

Permalink
Upgraded to Django 4.2
Browse files Browse the repository at this point in the history
* Output format of MVT mapbox tiles changed to newer format.
* Fixed Http404 error, should be NotFound anyway in DRF views.
* Fixed ChunkedQuerySetIterator, since Django 4.2 already does
  parts of this internally now.
  • Loading branch information
vdboor committed Jul 18, 2024
1 parent aad92f3 commit 5cfb5e3
Show file tree
Hide file tree
Showing 10 changed files with 2,403 additions and 2,063 deletions.
4 changes: 2 additions & 2 deletions src/dso_api/dynamic_api/views/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ class (namely the :class:`~dso_api.dynamic_api.views.DynamicApiViewSet` base cla
from functools import cached_property

from django.db import models
from django.http import Http404
from django.utils.translation import gettext as _
from rest_framework import viewsets
from rest_framework.exceptions import NotFound
from schematools.contrib.django.models import DynamicModel

from dso_api.dynamic_api import filters, permissions, serializers
Expand Down Expand Up @@ -115,7 +115,7 @@ def get_object(self) -> DynamicModel:
# Only retrieve the correct temporal object, unless filters change this
obj = queryset.order_by().first() # reverse ordering already applied
if obj is None:
raise Http404(
raise NotFound(
_("No %(verbose_name)s found matching the query")
% {"verbose_name": queryset.model._meta.verbose_name}
)
Expand Down
72 changes: 35 additions & 37 deletions src/requirements.in
Original file line number Diff line number Diff line change
@@ -1,58 +1,56 @@
Django == 3.2.25
django-environ == 0.9.0
django-cors-headers == 3.13.0
Django == 4.2.14
django-environ == 0.11.2
django-cors-headers == 4.4.0
django-healthchecks == 1.5.0
django-postgres-unlimited-varchar == 1.1.2
django-redis == 5.4.0
django-gisserver == 1.4.0
django-vectortiles == 0.2.0
djangorestframework == 3.14.0
djangorestframework-csv == 2.1.1
djangorestframework == 3.15.2
djangorestframework-csv == 3.0.2
djangorestframework-gis == 1.0
amsterdam-schema-tools[django] == 6.0
datadiensten-apikeyclient == 0.5.0
amsterdam-schema-tools[django] == 6.0.1
datadiensten-apikeyclient == 0.6.0
datapunt-authorization-django==1.3.3
drf-spectacular == 0.25.1
Geoalchemy2 == 0.11.1
importlib-metadata == 6.0.0
importlib-resources == 5.10.2
jsonschema == 4.17.3
lru_dict == 1.1.8
Markdown == 3.4.3
drf-spectacular == 0.27.2
Geoalchemy2 == 0.15.2
importlib-metadata == 8.0.0
importlib-resources == 6.4.0
jsonschema == 4.23.0
lru_dict == 1.3.0
Markdown == 3.6
more-ds == 0.0.6
more-itertools == 9.1.0
openapi-spec-validator == 0.5.2
opencensus-ext-azure >= 1.1.*
opencensus-ext-django >= 0.8.*
opencensus-ext-logging >= 0.1.*
orjson == 3.9.15
more-itertools == 10.3.0
openapi-spec-validator == 0.7.1
opencensus-ext-azure >= 1.1.13
opencensus-ext-django >= 0.8
opencensus-ext-logging >= 0.1.1
orjson == 3.10.6
python-string-utils == 1.0.0
requests == 2.32.2
sentry-sdk == 1.14.0
urllib3 == 1.26.19
requests == 2.32.3
sentry-sdk == 2.10.0
urllib3 == 2.2.2
urllib3-mock == 0.3.3
whitenoise == 6.4.0
zipp == 3.19.1
whitenoise == 6.7.0
zipp == 3.19.2

# Packaging
pur == 7.0.0
pur == 7.3.2

# Monitoring
uwsgitop == 0.11
uwsgitop == 0.12
uwsgi-readiness-check == 0.2.0

# Testing
bandit == 1.7.4
cryptography >= 41.0.2
flake8 == 5.0.4
bandit == 1.7.9
cryptography >= 42.0.8
flake8 == 7.1.0
flake8-blind-except == 0.2.1
flake8-colors == 0.1.9
flake8-debugger == 4.1.2
flake8-raise == 0.0.5
mapbox-vector-tile == 1.2.1
pytest == 7.2.1
pytest-django == 4.5.2
pytest-cov == 4.0.0
python-owasp-zap-v2.4 == 0.0.21

protobuf ~= 3.20.1 # for mapbox-vector-tile
mapbox-vector-tile == 2.1.0
pytest == 8.2.2
pytest-django == 4.8.0
pytest-cov == 5.0.0
python-owasp-zap-v2.4 == 0.1.0
2,039 changes: 1,103 additions & 936 deletions src/requirements.txt

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions src/requirements_dev.in
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
-r ./requirements.in

black==24.3.0
ruff==0.5.0
pytest-sugar==0.9.6
termcolor==2.0.1
pre-commit==2.20.0
django-debug-toolbar==3.7.0
django-extensions==3.2.1
pip-tools==7.1.0
tomli==1.1.0
black==24.4.2
ruff==0.5.2
pytest-sugar==1.0.0
termcolor==2.4.0
pre-commit==3.7.1
django-debug-toolbar==4.4.6
django-extensions==3.2.3
pip-tools==7.4.1
tomli==2.0.1
2,252 changes: 1,205 additions & 1,047 deletions src/requirements_dev.txt

Large diffs are not rendered by default.

54 changes: 35 additions & 19 deletions src/rest_framework_dso/iterators.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from itertools import islice
from typing import TypeVar

from django.db import models
from django.db import connections, models
from django.db.models.query import QuerySet
from lru import LRU

Expand All @@ -40,19 +40,13 @@
class ChunkedQuerySetIterator(Iterable[M]):
"""An optimal strategy to perform ``prefetch_related()`` on large datasets.
It fetches data from the queryset in chunks,
and performs ``prefetch_related()`` behavior on each chunk.
Originally, this was here as Django didn't allow combining ``QuerySet.iterator()``
with ``prefetch_related()``. That support landed in Django 4.2.
Django's ``QuerySet.prefetch_related()`` works by loading the whole queryset into memory,
and performing an analysis of the related objects to fetch. When working on large datasets,
this is very inefficient as more memory is consumed. Instead, ``QuerySet.iterator()``
is preferred here as it returns instances while reading them. Nothing is stored in memory.
Hence, both approaches are fundamentally incompatible. This class performs a
mixed strategy: load a chunk, and perform prefetches for that particular batch.
As extra performance benefit, a local cache avoids prefetching the same records
again when the next chunk is analysed. It has a "least recently used" cache to avoid
flooding the caches when foreign keys constantly point to different unique objects.
This code is still relevant, as it also adds an extra performance benefit.
It keeps a local cache of foreign-key objects, to avoid prefetching the same records
again when the next chunk is analysed. This is done using a "least recently used" dict
so the cache won't be flooded when foreign keys constantly point to different unique objects.
"""

def __init__(self, queryset: models.QuerySet, chunk_size=None, sql_chunk_size=None):
Expand All @@ -69,7 +63,7 @@ def __init__(self, queryset: models.QuerySet, chunk_size=None, sql_chunk_size=No

def __iter__(self):
# Using iter() ensures the ModelIterable is resumed with the next chunk.
qs_iter = iter(self.queryset.iterator(chunk_size=self.sql_chunk_size))
qs_iter = iter(self._get_queryset_iterator())
chunk_id = 0

# Keep fetching chunks
Expand All @@ -81,6 +75,28 @@ def __iter__(self):

yield from instances

def _get_queryset_iterator(self) -> Iterable:
"""The body of queryset.iterator(), while circumventing prefetching."""
# The old code did `return self.queryset.iterator(chunk_size=self.sql_chunk_size)`
# However, Django 4 supports using prefetch_related() with iterator() in that scenario.
#
# This code is the core of Django's QuerySet.iterator() that only produces the iteration,
# without any prefetches. Those are added by this class instead.
use_chunked_fetch = not connections[self.queryset.db].settings_dict.get(
"DISABLE_SERVER_SIDE_CURSORS"
)
iterable = self.queryset._iterable_class(
self.queryset, chunked_fetch=use_chunked_fetch, chunk_size=self.sql_chunk_size
)
if isinstance(self.queryset, ObservableQuerySet):
# As the _iterator() and __iter__() is bypassed here,
# the logic of the ObservableQuerySet needs to be restored.
# Otherwise, it will return the sentinel item which the DSOPage
# uses to detect whether there is a next page.
iterable = self.queryset.wrap_iterator(iterable)

yield from iterable

def _add_prefetches(self, instances: list[M], chunk_id):
"""Merge the prefetched objects for this batch with the model instances."""
if self._fk_caches:
Expand Down Expand Up @@ -198,22 +214,22 @@ def _clone(self, *args, **kwargs):
clone._item_callbacks = self._item_callbacks.copy()
return clone

def iterator(self, *args, **kwargs):
def _iterator(self, *args, **kwargs):
"""Return observable iterator and add observer.
Wraps an observable iterator around the iterator
returned by the base class.
"""
iterator = super().iterator(*args, **kwargs)
return self._wrap_iterator(iterator)
iterator = super()._iterator(*args, **kwargs)
return self.wrap_iterator(iterator)

def __iter__(self):
"""Return observable iterator and add observer.
Wraps an observable iterator around the iterator
returned by the base class.
"""
return self._wrap_iterator(super().__iter__())
return self.wrap_iterator(super().__iter__())

def _wrap_iterator(self, iterator: Iterable) -> ObservableIterator:
def wrap_iterator(self, iterator: Iterable) -> ObservableIterator:
"""Wrap an iterator inside an ObservableIterator"""
iterator = ObservableIterator(iterator)
iterator.add_observer(self._item_observer)
Expand Down
2 changes: 1 addition & 1 deletion src/rest_framework_dso/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def page(self, number):
# One additional sentinel object, is given to the page.
# This object should not be rendered, but it allows the page
# to detect whether more items exist beyond it and hence whether a next page exists.
sentinel = 1 # thrown away in DSOPage using next()
sentinel = 1
return self._get_page(self.object_list[bottom : top + sentinel], number, self)

def _get_page(self, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion src/tests/test_dynamic_api/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def test_display_title_present(drf_request, fietspaaltjes_model, fietspaaltjes_d
context={"request": drf_request, "view": to_serializer_view(fietspaaltjes_model)},
)

assert "'title', 'reference for DISPLAY FIELD'" in str(fietsplaatjes_serializer.data)
assert "'title': 'reference for DISPLAY FIELD'" in str(fietsplaatjes_serializer.data)

@staticmethod
def test_no_display_title_present(
Expand Down
14 changes: 6 additions & 8 deletions src/tests/test_dynamic_api/test_views_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2298,20 +2298,18 @@ def test_paginated_list(
assert response["Content-Type"] == expected_type # Test before reading stream
assert response.status_code == 200, response.getvalue()
assert isinstance(response, StreamingResponse)
assert response["Content-Type"] == expected_type # And test after reading

data = decoder(response.getvalue())
assert response["Content-Type"] == expected_type # And test after reading

if format != "json" and page_size_param == "page_size":
# Handling of this synonym is broken: AB#24678.
return

assert data == make_expected(self, page_num, page_size_param)
pytest.xfail("Handling of this synonym is broken: AB#24678.")

# Paginator was triggered
assert response["X-Pagination-Page"] == str(page_num)
assert response["X-Pagination-Limit"] == "4"

assert data == make_expected(self, page_num, page_size_param)

# proves middleware detected streaming response, and didn't break it:
assert "Content-Length" not in response

Expand Down Expand Up @@ -2487,8 +2485,8 @@ def test_detail_404(self, format, api_client, afval_dataset, filled_router):
data = json.loads(response.getvalue())
assert data == {
"type": "urn:apiexception:not_found",
"title": "No Containers found matching the query",
"detail": "Not found.",
"title": "Not found.",
"detail": "No Containers found matching the query",
"status": 404,
}

Expand Down
9 changes: 6 additions & 3 deletions src/tests/test_dynamic_api/test_views_mvt.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def test_mvt_content(api_client, afval_container_model, afval_cluster, filled_ro
"default": {
"extent": 4096,
"version": 2,
"type": "FeatureCollection",
"features": [
{
"geometry": {"type": "Point", "coordinates": [1928, 2558]},
Expand All @@ -151,7 +152,7 @@ def test_mvt_content(api_client, afval_container_model, afval_cluster, filled_ro
"datum_leegmaken": "2021-01-03 12:13:14+01",
},
"id": 0,
"type": 1,
"type": "Feature",
}
],
}
Expand All @@ -170,12 +171,13 @@ def test_mvt_content(api_client, afval_container_model, afval_cluster, filled_ro
"default": {
"extent": 4096,
"version": 2,
"type": "FeatureCollection",
"features": [
{
"geometry": {"type": "Point", "coordinates": [3825, 1344]},
"properties": {},
"id": 0,
"type": 1,
"type": "Feature",
}
],
}
Expand Down Expand Up @@ -211,12 +213,13 @@ def test_mvt_model_auth(api_client, geometry_auth_model, fetch_auth_token, fille
"default": {
"extent": 4096,
"version": 2,
"type": "FeatureCollection",
"features": [
{
"geometry": {"type": "Point", "coordinates": [1928, 2558]},
"id": 0,
"properties": {"id": 1, "metadata": "secret"},
"type": 1,
"type": "Feature",
}
],
}
Expand Down

0 comments on commit 5cfb5e3

Please sign in to comment.