From a205ec8ffc8d7cd8731f840ef6891c074b8d4e68 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 31 Jan 2018 09:51:34 -0500 Subject: [PATCH 1/3] Determine the OPDS import URL through an overridable method. --- model.py | 2 ++ opds_import.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/model.py b/model.py index feaf80174..b7591cebc 100644 --- a/model.py +++ b/model.py @@ -10039,6 +10039,8 @@ class ExternalIntegration(Base, HasFullTableCache): ONE_CLICK = RB_DIGITAL OPDS_FOR_DISTRIBUTORS = u'OPDS for Distributors' ENKI = DataSource.ENKI + FEEDBOOKS = DataSource.FEEDBOOKS + UNGLUEIT = DataSource.UNGLUEIT # These protocols are only used on the Content Server when mirroring # content from a given directory or directly from Project diff --git a/opds_import.py b/opds_import.py index 023d8ad2b..7fdb489d8 100644 --- a/opds_import.py +++ b/opds_import.py @@ -1462,7 +1462,7 @@ def __init__(self, _db, collection, import_class, "Collection %s has no associated data source." % collection.name ) - self.feed_url = collection.external_account_id + self.feed_url = self.opds_url(collection) self.force_reimport = force_reimport self.importer = import_class( _db, collection=collection, **import_class_kwargs @@ -1487,6 +1487,14 @@ def _get(self, url, headers): response = HTTP.get_with_timeout(url, headers=headers, **kwargs) return response.status_code, response.headers, response.content + def opds_url(self, collection): + """Returns the OPDS import URL for the given collection. + + By default, this URL is stored as the external account ID, but + subclasses may override this. + """ + return collection.external_account_id + def feed_contains_new_data(self, feed): """Does the given feed contain any entries that haven't been imported yet? From cb3245e4291efd79e829fad8d19e6b7c8d9b3a28 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 31 Jan 2018 11:49:08 -0500 Subject: [PATCH 2/3] Added the ability for a subclass of OPDSImportMonitor to customize behavior. --- model.py | 1 - opds_import.py | 12 ++++++++++-- scripts.py | 15 +++++++++++---- tests/test_opds_import.py | 14 ++++++++++++++ 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/model.py b/model.py index b7591cebc..760bf0b50 100644 --- a/model.py +++ b/model.py @@ -10040,7 +10040,6 @@ class ExternalIntegration(Base, HasFullTableCache): OPDS_FOR_DISTRIBUTORS = u'OPDS for Distributors' ENKI = DataSource.ENKI FEEDBOOKS = DataSource.FEEDBOOKS - UNGLUEIT = DataSource.UNGLUEIT # These protocols are only used on the Content Server when mirroring # content from a given directory or directly from Project diff --git a/opds_import.py b/opds_import.py index 7fdb489d8..02be1a08d 100644 --- a/opds_import.py +++ b/opds_import.py @@ -1456,7 +1456,7 @@ def __init__(self, _db, collection, import_class, ) ) - data_source = collection.data_source + data_source = self.data_source(collection) if not data_source: raise ValueError( "Collection %s has no associated data source." % collection.name @@ -1495,6 +1495,14 @@ def opds_url(self, collection): """ return collection.external_account_id + def data_source(self, collection): + """Returns the data source name for the given collection. + + By default, this URL is stored as a setting on the collection, but + subclasses may hard-code it. + """ + return collection.data_source + def feed_contains_new_data(self, feed): """Does the given feed contain any entries that haven't been imported yet? @@ -1618,7 +1626,7 @@ def import_one_feed(self, feed): imported_editions, pools, works, failures = self.importer.import_from_feed( feed, even_if_no_author=True, immediately_presentation_ready = True, - feed_url=self.collection.external_account_id + feed_url=self.opds_url(self.collection) ) # Create CoverageRecords for the successful imports. diff --git a/scripts.py b/scripts.py index f60a0ba58..37d72dd7f 100644 --- a/scripts.py +++ b/scripts.py @@ -1603,7 +1603,14 @@ class OPDSImportScript(CollectionInputScript): IMPORTER_CLASS = OPDSImporter MONITOR_CLASS = OPDSImportMonitor PROTOCOL = ExternalIntegration.OPDS_IMPORT - + + def __init__(self, _db=None, importer_class=None, monitor_class=None, + protocol=None, *args, **kwargs): + super(OPDSImportScript, self).__init__(_db, *args, **kwargs) + self.importer_class = importer_class or self.IMPORTER_CLASS + self.monitor_class = monitor_class or self.MONITOR_CLASS + self.protocol = protocol or self.PROTOCOL + @classmethod def arg_parser(cls): parser = CollectionInputScript.arg_parser() @@ -1616,13 +1623,13 @@ def arg_parser(cls): def do_run(self, cmd_args=None): parsed = self.parse_command_line(self._db, cmd_args=cmd_args) - collections = parsed.collections or Collection.by_protocol(self._db, self.PROTOCOL) + collections = parsed.collections or Collection.by_protocol(self._db, self.protocol) for collection in collections: self.run_monitor(collection, force=parsed.force) def run_monitor(self, collection, force=None): - monitor = self.MONITOR_CLASS( - self._db, collection, import_class=self.IMPORTER_CLASS, + monitor = self.monitor_class( + self._db, collection, import_class=self.importer_class, force_reimport=force ) monitor.run() diff --git a/tests/test_opds_import.py b/tests/test_opds_import.py index 19e7627c0..8c26b3403 100644 --- a/tests/test_opds_import.py +++ b/tests/test_opds_import.py @@ -1559,6 +1559,20 @@ def test_constructor(self): self._default_collection, OPDSImporter, ) + + def test_hook_methods(self): + """By default, the OPDS URL and data source used by the importer + come from the collection configuration. + """ + monitor = OPDSImportMonitor( + self._db, self._default_collection, + import_class=OPDSImporter, + ) + eq_(self._default_collection.external_account_id, + monitor.opds_url(self._default_collection)) + + eq_(self._default_collection.data_source, + monitor.data_source(self._default_collection)) def test_feed_contains_new_data(self): feed = self.content_server_mini_feed From ab63ecdc5e0a3f4d25d1271babb6f7c0eef40f3e Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 31 Jan 2018 12:03:28 -0500 Subject: [PATCH 3/3] Added missing migration script for mirror_integration_id. --- migration/20180129-collection-mirror-integration.sql | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 migration/20180129-collection-mirror-integration.sql diff --git a/migration/20180129-collection-mirror-integration.sql b/migration/20180129-collection-mirror-integration.sql new file mode 100644 index 000000000..c82e31699 --- /dev/null +++ b/migration/20180129-collection-mirror-integration.sql @@ -0,0 +1,4 @@ +-- Add collections.mirror_integration_id, a foreign key +-- against externalintegrations.id +alter table collections add column mirror_integration_id integer; +alter table collections add constraint collections_mirror_integration_id_fkey FOREIGN KEY (mirror_integration_id) REFERENCES externalintegrations(id);