diff --git a/api/odl.py b/api/odl.py index dae53c0ae0..6172a751e1 100644 --- a/api/odl.py +++ b/api/odl.py @@ -1,68 +1,56 @@ import datetime -import dateutil import json import uuid -from flask_babel import lazy_gettext as _ -import urllib.parse -from collections import defaultdict -import flask -from flask import Response -import feedparser -from lxml import etree -from .problem_details import NO_LICENSES from io import StringIO -import re -from uritemplate import URITemplate +import dateutil +import feedparser +import flask +from flask import url_for +from flask_babel import lazy_gettext as _ +from lxml import etree from sqlalchemy.sql.expression import or_ +from uritemplate import URITemplate -from core.opds_import import ( - OPDSXMLParser, - OPDSImporter, - OPDSImportMonitor, -) -from core.monitor import ( - CollectionMonitor, - TimelineMonitor, +from core import util +from core.analytics import Analytics +from core.metadata_layer import ( + CirculationData, + FormatData, + LicenseData, + TimestampData, ) from core.model import ( Collection, ConfigurationSetting, - Credential, DataSource, DeliveryMechanism, Edition, ExternalIntegration, Hold, Hyperlink, - Identifier, - IntegrationClient, LicensePool, Loan, MediaTypes, RightsStatus, Session, - create, get_one, get_one_or_create, + Representation) +from core.monitor import ( + CollectionMonitor, + IdentifierSweepMonitor) +from core.opds_import import ( + OPDSXMLParser, + OPDSImporter, + OPDSImportMonitor, ) -from core.metadata_layer import ( - CirculationData, - FormatData, - IdentifierData, - LicenseData, - TimestampData, -) -from .circulation import ( - BaseCirculationAPI, - LoanInfo, - FulfillmentInfo, - HoldInfo, +from core.testing import ( + DatabaseTest, + MockRequestsResponse, ) -from core.analytics import Analytics from core.util.datetime_helpers import ( utc_now, - strptime_utc, ) from core.util.http import ( HTTP, @@ -70,14 +58,16 @@ RemoteIntegrationException, ) from core.util.string_helpers import base64 -from flask import url_for -from core.testing import ( - DatabaseTest, - MockRequestsResponse, +from .circulation import ( + BaseCirculationAPI, + LoanInfo, + FulfillmentInfo, + HoldInfo, ) from .circulation_exceptions import * from .shared_collection import BaseSharedCollectionAPI + class ODLAPI(BaseCirculationAPI, BaseSharedCollectionAPI): """ODL (Open Distribution to Libraries) is a specification that allows libraries to manage their own loans and holds. It offers a deeper level @@ -782,6 +772,7 @@ class ODLXMLParser(OPDSXMLParser): NAMESPACES = dict(OPDSXMLParser.NAMESPACES, odl="http://opds-spec.org/odl") + class ODLImporter(OPDSImporter): """Import information and formats from an ODL feed. @@ -795,6 +786,35 @@ class ODLImporter(OPDSImporter): # about the license. LICENSE_INFO_DOCUMENT_MEDIA_TYPE = 'application/vnd.odl.info+json' + _skip_expired_items = True + + def __init__( + self, + _db, + collection, + data_source_name=None, + identifier_mapping=None, + http_get=None, + metadata_client=None, + content_modifier=None, + map_from_collection=None, + mirrors=None, + skip_expired_items=True + ): + super(ODLImporter, self).__init__( + _db, + collection, + data_source_name, + identifier_mapping, + http_get, + metadata_client, + content_modifier, + map_from_collection, + mirrors + ) + + ODLImporter._skip_expired_items = skip_expired_items + @classmethod def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None, do_get=None): do_get = do_get or Representation.cautious_http_get @@ -887,8 +907,15 @@ def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None, do_get= if terms: concurrent_checkouts = subtag(terms[0], "odl:concurrent_checkouts") expires = subtag(terms[0], "odl:expires") + if expires: - expires = dateutil.parser.parse(expires) + expires = util.datetime_helpers.to_utc(dateutil.parser.parse(expires)) + + if ODLImporter._skip_expired_items: + now = util.datetime_helpers.utc_now() + + if expires <= now: + continue licenses_owned += int(concurrent_checkouts or 0) licenses_available += int(available_checkouts or 0) @@ -914,6 +941,7 @@ def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None, do_get= data['circulation']['licenses_available'] = licenses_available return data + class ODLImportMonitor(OPDSImportMonitor): """Import information from an ODL feed.""" PROTOCOL = ODLImporter.NAME @@ -1040,6 +1068,27 @@ def __init__(self, _db, collection): self.base_url = collection.external_account_id + @staticmethod + def _parse_feed_from_response(response): + """Parse ODL (Atom) feed from the HTTP response. + + :param response: HTTP response + :type response: requests.Response + + :return: Parsed ODL (Atom) feed + :rtype: dict + """ + response_content = response.content + + if isinstance(response_content, bytes): + response_content = response_content.decode("utf-8") + elif not isinstance(response_content, str): + raise ValueError("Response content must be a string or byte-encoded value") + + feed = feedparser.parse(response_content) + + return feed + def internal_format(self, delivery_mechanism): """Each consolidated copy is only available in one format, so we don't need a mapping to internal formats. @@ -1091,7 +1140,8 @@ def checkout(self, patron, pin, licensepool, internal_format): hold_info_response = self._get(hold.external_identifier) except RemoteIntegrationException as e: raise CannotLoan() - feed = feedparser.parse(str(hold_info_response.content)) + + feed = self._parse_feed_from_response(hold_info_response) entries = feed.get("entries") if len(entries) < 1: raise CannotLoan() @@ -1117,7 +1167,8 @@ def checkout(self, patron, pin, licensepool, internal_format): elif response.status_code == 404: if hasattr(response, 'json') and response.json().get('type', '') == NO_LICENSES.uri: raise NoLicenses() - feed = feedparser.parse(str(response.content)) + + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CannotLoan() @@ -1181,7 +1232,8 @@ def checkin(self, patron, pin, licensepool): raise CannotReturn() if response.status_code == 404: raise NotCheckedOut() - feed = feedparser.parse(str(response.content)) + + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CannotReturn() @@ -1286,7 +1338,8 @@ def release_hold(self, patron, pin, licensepool): raise CannotReleaseHold() if response.status_code == 404: raise NotOnHold() - feed = feedparser.parse(str(response.content)) + + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CannotReleaseHold() @@ -1325,7 +1378,7 @@ def patron_activity(self, patron, pin): if response.status_code == 404: # 404 is returned when the loan has been deleted. Leave this loan out of the result. continue - feed = feedparser.parse(str(response.content)) + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CirculationException() @@ -1354,7 +1407,7 @@ def patron_activity(self, patron, pin): if response.status_code == 404: # 404 is returned when the hold has been deleted. Leave this hold out of the result. continue - feed = feedparser.parse(str(response.content)) + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CirculationException() @@ -1518,3 +1571,39 @@ def _get(self, url, patron=None, headers=None, allowed_response_codes=None): self.request_args.append((patron, headers, allowed_response_codes)) response = self.responses.pop() return HTTP._process_response(url, response, allowed_response_codes=allowed_response_codes) + + +class ODLExpiredItemsReaper(IdentifierSweepMonitor): + """Check for books that are in the local collection but have left our + Overdrive collection. + """ + SERVICE_NAME = "ODL Expired Items Reaper" + PROTOCOL = ODLAPI.NAME + + def __init__(self, _db, collection): + super(ODLExpiredItemsReaper, self).__init__(_db, collection) + + def process_item(self, identifier): + for licensepool in identifier.licensed_through: + licenses_owned = licensepool.licenses_owned + licenses_available = licensepool.licenses_available + + for license in licensepool.licenses: + if license.is_expired: + licenses_owned -= 1 + licenses_available -= 1 + + if licenses_owned != licensepool.licenses_owned or licenses_available != licensepool.licenses_available: + licenses_owned = max(licenses_owned, 0) + licenses_available = max(licenses_available, 0) + + circulation_data = CirculationData( + data_source=licensepool.data_source, + primary_identifier=identifier, + licenses_owned=licenses_owned, + licenses_available=licenses_available, + licenses_reserved=0, + patrons_in_hold_queue=0, + ) + + circulation_data.apply(self._db, self.collection) diff --git a/api/odl2.py b/api/odl2.py index 2a03f3d357..ca16aeb9eb 100644 --- a/api/odl2.py +++ b/api/odl2.py @@ -1,11 +1,12 @@ import json import logging +from core import util from contextlib2 import contextmanager from flask_babel import lazy_gettext as _ from webpub_manifest_parser.opds2.registry import OPDS2LinkRelationsRegistry -from api.odl import ODLAPI +from api.odl import ODLAPI, ODLExpiredItemsReaper from core.metadata_layer import FormatData, LicenseData from core.model import DeliveryMechanism, Edition, MediaTypes, RightsStatus from core.model.configuration import ( @@ -239,6 +240,13 @@ def _extract_publication_metadata(self, feed, publication, data_source_name): expires = license.metadata.terms.expires concurrent_checkouts = license.metadata.terms.concurrency + if expires: + expires = util.datetime_helpers.to_utc(expires) + now = util.datetime_helpers.utc_now() + + if expires <= now: + continue + licenses_owned += int(concurrent_checkouts or 0) licenses_available += int(available_checkouts or 0) @@ -270,3 +278,11 @@ class ODL2ImportMonitor(OPDS2ImportMonitor): PROTOCOL = ODL2Importer.NAME SERVICE_NAME = "ODL 2.x Import Monitor" + + +class ODL2ExpiredItemsReaper(ODLExpiredItemsReaper): + """Check for books that are in the local collection but have left our + Overdrive collection. + """ + SERVICE_NAME = "ODL 2 Expired Items Reaper" + PROTOCOL = ODL2Importer.NAME diff --git a/bin/odl2_reaper b/bin/odl2_reaper new file mode 100755 index 0000000000..b74b5fbf11 --- /dev/null +++ b/bin/odl2_reaper @@ -0,0 +1,10 @@ +#!/usr/bin/env python +"""Monitor the Overdrive collections by looking for books with lost licenses.""" +import os +import sys +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.scripts import RunCollectionMonitorScript +from api.odl2 import ODL2ExpiredItemsReaper +RunCollectionMonitorScript(ODL2ExpiredItemsReaper).run() diff --git a/bin/odl_reaper b/bin/odl_reaper new file mode 100755 index 0000000000..18828c03f6 --- /dev/null +++ b/bin/odl_reaper @@ -0,0 +1,10 @@ +#!/usr/bin/env python +"""Monitor the Overdrive collections by looking for books with lost licenses.""" +import os +import sys +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.scripts import RunCollectionMonitorScript +from api.odl import ODLExpiredItemsReaper +RunCollectionMonitorScript(ODLExpiredItemsReaper).run() diff --git a/tests/files/odl/single_license.opds b/tests/files/odl/single_license.opds new file mode 100644 index 0000000000..e89a17d610 --- /dev/null +++ b/tests/files/odl/single_license.opds @@ -0,0 +1,79 @@ + + + https://market.feedbooks.com/api/libraries/harvest.atom + Feedbooks + 2021-08-16T09:07:14Z + /favicon.ico + + Feedbooks + https://market.feedbooks.com + support@feedbooks.zendesk.com + + + 481 + 100 + + + The Golden State + https://www.feedbooks.com/item/2895246 + urn:ISBN:9780374718060 + urn:ISBN:9780374164836 + + Lydia Kiesling + https://market.feedbooks.com/store/browse/recent.atom?author_id=954566&lang=en + + 2018-08-12T00:16:43Z + 2020-05-21T11:13:26Z + en + Mcd + 2018-09-03 + NATIONAL BOOK FOUNDATION 5 UNDER 35 PICK. LONGLISTED FOR THE CENTER FOR FICTION'S FIRST NOVEL PRIZE. Named one of the Best Books of 2018 by NPR, Bookforum and Bustle. One of Entertainment Weekly's 10 Best Debut Novels of 2018. An Amazon Best Book of the Month and named a fall read by Buzzfeed, Nylon, Entertainment Weekly, Elle, Vanity Fair, Vulture, Refinery29 and Mind Body GreenA gorgeous, raw debut novel about a young woman braving the ups and downs of motherhood in a fractured AmericaIn Lydia Kiesling's razor-sharp debut novel, The Golden State, we accompany Daphne, a young mother on the edge of a breakdown, as she flees her sensible but strained life in San Francisco for the high desert of Altavista with her toddler, Honey. Bucking under the weight of being a single parent--her Turkish husband is unable to return to the United States because of a "processing error"--Daphne takes refuge in a mobile home left to her by her grandparents in hopes that the quiet will bring clarity. But clarity proves elusive. Over the next ten days Daphne is anxious, she behaves a little erratically, she drinks too much. She wanders the town looking for anyone and anything to punctuate the long hours alone with the baby. Among others, she meets Cindy, a neighbor who is active in a secessionist movement, and befriends the elderly Alice, who has traveled to Altavista as she approaches the end of her life. When her relationships with these women culminate in a dangerous standoff, Daphne must reconcile her inner narrative with the reality of a deeply divided world. Keenly observed, bristling with humor, and set against the beauty of a little-known part of California, The Golden State is about class and cultural breakdowns, and desperate attempts to bridge old and new worlds. But more than anything, it is about motherhood: its voracious worry, frequent tedium, and enthralling, wondrous love. + 4 MB + + + + + + + + + 40.00 + + + + + + + + + urn:uuid:c981d61e-26f4-4070-aaa8-83df952cf61b + application/epub+zip + text/html + http://www.cantook.net/ + 40.00 + cant-2461538-24501117858552614-libraries + 2020-03-02T20:20:17+01:00 + + {{expires}} + 1 + 5097600 + + + application/vnd.adobe.adept+xml + 6 + true + false + false + + + application/vnd.readium.lcp.license.v1.0+json + 6 + true + false + false + + + + + + \ No newline at end of file diff --git a/tests/test_odl.py b/tests/test_odl.py index a3b19deafb..9dd9e05a89 100644 --- a/tests/test_odl.py +++ b/tests/test_odl.py @@ -1,42 +1,41 @@ -import pytest -import os -import json import datetime -import dateutil -import re +import json +import os import urllib.parse -from pdb import set_trace -from core.testing import DatabaseTest -from core.metadata_layer import TimestampData + +import dateutil +import pytest +from dateutil.tz import tzoffset +from freezegun import freeze_time +from mock import MagicMock + +from api.circulation_exceptions import * +from api.odl import ( + ODLImporter, + ODLHoldReaper, + MockODLAPI, + SharedODLAPI, + MockSharedODLAPI, + SharedODLImporter, + ODLExpiredItemsReaper) from core.model import ( Collection, ConfigurationSetting, - Credential, DataSource, DeliveryMechanism, Edition, ExternalIntegration, Hold, Hyperlink, - Identifier, Loan, MediaTypes, Representation, - RightsStatus, - get_one, -) -from api.odl import ( - ODLImporter, - ODLHoldReaper, - MockODLAPI, - SharedODLAPI, - MockSharedODLAPI, - SharedODLImporter, -) -from api.circulation_exceptions import * + RightsStatus) +from core.scripts import RunCollectionMonitorScript +from core.testing import DatabaseTest +from core.util import datetime_helpers from core.util.datetime_helpers import ( datetime_utc, - strptime_utc, utc_now, ) from core.util.http import ( @@ -45,6 +44,7 @@ ) from core.util.string_helpers import base64 + class BaseODLTest(object): base_path = os.path.split(__file__)[0] resource_path = os.path.join(base_path, "files", "odl") @@ -52,7 +52,8 @@ class BaseODLTest(object): @classmethod def get_data(cls, filename): path = os.path.join(cls.resource_path, filename) - return open(path, "rb").read() + return open(path, "r").read() + class TestODLAPI(DatabaseTest, BaseODLTest): @@ -1324,6 +1325,7 @@ def do_get(url, headers): self._db, collection=collection, metadata_client=metadata_client, http_get=do_get, + skip_expired_items=False ) imported_editions, imported_pools, imported_works, failures = ( @@ -1517,7 +1519,6 @@ def test_run_once(self): assert None == progress.finish - class TestSharedODLAPI(DatabaseTest, BaseODLTest): def setup_method(self): @@ -1847,6 +1848,7 @@ def test_patron_activity_remote_integration_exception(self): pytest.raises(RemoteIntegrationException, self.api.patron_activity, self.patron, "pin") assert [hold.external_identifier] == self.api.requests[1:] + class TestSharedODLImporter(DatabaseTest, BaseODLTest): def test_get_fulfill_url(self): @@ -1929,3 +1931,142 @@ def canonicalize_author_name(self, identifier, working_display_name): assert 'http://localhost:6500/AL/works/URI/http://www.feedbooks.com/item/1946289/borrow' == borrow_link.resource.url +class TestODLExpiredItemsReaper(DatabaseTest, BaseODLTest): + ODL_FEED_FILENAME_WITH_SINGLE_ODL_LICENSE = "single_license.opds" + ODL_LICENSE_EXPIRATION_TIME_PLACEHOLDER = "{{expires}}" + + def _get_test_feed_with_single_odl_license(self, expires): + """Get the feed with a single ODL license with the specific expiration date. + + :param expires: Expiration date of the ODL license + :type expires: datetime.datetime + + :return: Test ODL feed with a single ODL license with the specific expiration date + :rtype: str + """ + feed = self.get_data("single_license.opds") + feed = feed.replace(self.ODL_LICENSE_EXPIRATION_TIME_PLACEHOLDER, expires.isoformat()) + + return feed + + def _import_test_feed_with_single_odl_license(self, expires): + """Import the test ODL feed with a single ODL license with the specific expiration date. + + :param expires: Expiration date of the ODL license + :type expires: datetime.datetime + + :return: 3-tuple containing imported editions, license pools and works + :rtype: Tuple[List[Edition], List[LicensePool], List[Work]] + """ + feed = self._get_test_feed_with_single_odl_license(expires) + data_source = DataSource.lookup(self._db, "Feedbooks", autocreate=True) + collection = MockODLAPI.mock_collection(self._db) + collection.external_integration.set_setting( + Collection.DATA_SOURCE_NAME_SETTING, + data_source.name + ) + license_status = { + "checkouts": { + "available": 1 + } + } + license_status_response = MagicMock(return_value=(200, {}, json.dumps(license_status))) + importer = ODLImporter( + self._db, + collection=collection, + http_get=license_status_response, + ) + + imported_editions, imported_pools, imported_works, _ = ( + importer.import_from_feed(feed) + ) + + return imported_editions, imported_pools, imported_works + + @freeze_time("2021-01-01T00:00:00+00:00") + def test_odl_importer_skips_expired_licenses(self): + """Ensure ODLImporter skips expired licenses + and does not count them in the total number of available licenses.""" + # 1.1. Import the test feed with an expired ODL license. + # The license expires 2021-01-01T00:01:00+01:00 that equals to 2010-01-01T00:00:00+00:00, the current time. + # It means the license had already expired at the time of the import. + license_expiration_date = datetime.datetime(2021, 1, 1, 1, 0, 0, tzinfo=tzoffset(None, 3600)) + imported_editions, imported_pools, imported_works = self._import_test_feed_with_single_odl_license( + license_expiration_date + ) + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 1.2. Ensure that the license pool was successfully created but it does not have any available licenses. + assert len(imported_pools) == 1 + + [imported_pool] = imported_pools + assert imported_pool.licenses_owned == 0 + assert imported_pool.licenses_available == 0 + assert len(imported_pool.licenses) == 0 + + @freeze_time("2021-01-01T00:00:00+00:00") + def test_odl_reaper_removes_expired_licenses(self): + """Ensure ODLExpiredItemsReaper removes expired licenses.""" + patron = self._patron() + + # 1.1. Import the test feed with an ODL license that is still valid. + # The license will be valid for one more day since this very moment. + license_expiration_date = datetime_helpers.utc_now() + datetime.timedelta(days=1) + imported_editions, imported_pools, imported_works = self._import_test_feed_with_single_odl_license( + license_expiration_date + ) + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 1.2. Ensure that there is a license pool with available license. + assert len(imported_pools) == 1 + + [imported_pool] = imported_pools + assert imported_pool.licenses_owned == 1 + assert imported_pool.licenses_available == 1 + + assert len(imported_pool.licenses) == 1 + [license] = imported_pool.licenses + assert license.expires == license_expiration_date + + # 2. Create a loan to ensure that the licence with active loan can also be removed (hidden). + loan, _ = license.loan_to(patron) + + # 3.1. Run ODLExpiredItemsReaper. This time nothing should happen since the license is still valid. + script = RunCollectionMonitorScript(ODLExpiredItemsReaper, _db=self._db, cmd_args=["Test ODL Collection"]) + script.run() + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 3.2. Ensure that availability of the license pool didn't change. + assert imported_pool.licenses_owned == 1 + assert imported_pool.licenses_available == 1 + + # 4. Expire the license. + # Set the expiration date to 2021-01-01T00:01:00+01:00 + # that equals to 2010-01-01T00:00:00+00:00, the current time. + license.expires = datetime.datetime(2021, 1, 1, 1, 0, 0, tzinfo=tzoffset(None, 3600)) + + # 5.1. Run ODLExpiredItemsReaper again. This time it should remove the expired license. + script.run() + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 5.2. Ensure that availability of the license pool was updated and now it doesn't have any available licenses. + assert imported_pool.licenses_owned == 0 + assert imported_pool.licenses_available == 0 + + # 6.1. Run ODLExpiredItemsReaper again to ensure that number of licenses won't become negative. + script.run() + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 6.2. Ensure that number of licenses is still 0. + assert imported_pool.licenses_owned == 0 + assert imported_pool.licenses_available == 0