diff --git a/analytics.py b/analytics.py index 84c6f01ae..03a1b22d4 100644 --- a/analytics.py +++ b/analytics.py @@ -1,71 +1,41 @@ +from nose.tools import set_trace import importlib import contextlib import datetime -from config import Configuration +from collections import defaultdict +from model import ExternalIntegration +from config import CannotLoadConfiguration class Analytics(object): - __instance = None - - if '.' in __module__: - # We are operating in an application that imports this product - # as a package (probably called 'core'). The module name of - # the analytics provider should be scoped to the name of the - # package, i.e. 'core.local_analytics_provider'. - package_name = __module__[:__module__.rfind('.')+1] - else: - # This application is not imported as a package, probably - # because we're running its unit tests. - package_name = '' - - DEFAULT_PROVIDERS = [package_name + "local_analytics_provider"] - - @classmethod - def instance(cls): - if not cls.__instance: - config = Configuration.instance - providers = cls.load_providers_from_config(config) - cls.initialize(providers, config) - return cls.__instance - - @classmethod - def initialize(cls, providers, config): - if not providers: - cls.__instance = cls() - return cls.__instance - if isinstance(providers, basestring): - providers = [providers] - analytics_providers = [] - for provider_string in providers: - provider_module = importlib.import_module(provider_string) - provider_class = getattr(provider_module, "Provider") - analytics_providers.append(provider_class.from_config(config)) - cls.__instance = cls(analytics_providers) - return cls.__instance - - def __init__(self, providers=[]): - self.providers = providers - - @classmethod - def collect_event(cls, _db, license_pool, event_type, time=None, **kwargs): + def __init__(self, _db): + self.sitewide_providers = [] + self.library_providers = defaultdict(list) + self.initialization_exceptions = {} + + # Find a list of all the ExternalIntegrations set up with a + # goal of analytics. + integrations = _db.query(ExternalIntegration).filter(ExternalIntegration.goal==ExternalIntegration.ANALYTICS_GOAL) + # Turn each integration into an analytics provider. + for integration in integrations: + try: + provider_module = importlib.import_module(integration.protocol) + provider_class = getattr(provider_module, "Provider", None) + if provider_class: + if not integration.libraries: + provider = provider_class(integration) + self.sitewide_providers.append(provider) + else: + for library in integration.libraries: + provider = provider_class(integration, library) + self.library_providers[library.id].append(provider) + else: + self.initialization_exceptions[integration.id] = "Module %s does not have Provider defined." % integration.protocol + except (ImportError, CannotLoadConfiguration), e: + self.initialization_exceptions[integration.id] = e + + def collect_event(self, library, license_pool, event_type, time=None, **kwargs): if not time: time = datetime.datetime.utcnow() - for provider in cls.instance().providers: - provider.collect_event(_db, license_pool, event_type, time, **kwargs) - - @classmethod - def load_providers_from_config(cls, config): - policies = config.get(Configuration.POLICIES, {}) - return policies.get(Configuration.ANALYTICS_POLICY, cls.DEFAULT_PROVIDERS) - - -@contextlib.contextmanager -def temp_analytics(providers, config): - """A context manager to temporarily replace the analytics providers - used by a test. - """ - old_instance = Analytics._Analytics__instance - Analytics.initialize(providers, config) - yield - Analytics._Analytics__instance = old_instance - + for provider in (self.sitewide_providers + self.library_providers[library.id]): + provider.collect_event(library, license_pool, event_type, time, **kwargs) diff --git a/config.py b/config.py index 5e8abf0b0..1b781b66f 100644 --- a/config.py +++ b/config.py @@ -130,7 +130,11 @@ class Configuration(object): }, { "key": GROUPED_MAX_AGE_POLICY, - "label": _("Cache time for grouped OPDS feeds") + "label": _("Cache time for grouped OPDS feeds"), + }, + { + "key": BASE_URL_KEY, + "label": _("Base url of the application"), }, ] diff --git a/coverage.py b/coverage.py index 22e20ac11..430e0511e 100644 --- a/coverage.py +++ b/coverage.py @@ -445,7 +445,7 @@ def __init__(self, _db, collection=None, input_identifiers=None, self.collection_id = collection.id self.input_identifiers = input_identifiers self.replacement_policy = ( - replacement_policy or self._default_replacement_policy + replacement_policy or self._default_replacement_policy(_db) ) if not self.DATA_SOURCE_NAME: @@ -457,8 +457,7 @@ def __init__(self, _db, collection=None, input_identifiers=None, # if INPUT_IDENTIFIER_TYPES is not set properly. self.input_identifier_types = self._input_identifier_types() - @property - def _default_replacement_policy(self): + def _default_replacement_policy(self, _db): """Unless told otherwise, assume that we are getting this data from a reliable metadata source. """ @@ -743,13 +742,12 @@ def __init__(self, collection, **kwargs): _db, collection, **kwargs ) - @property - def _default_replacement_policy(self): + def _default_replacement_policy(self, _db): """Unless told otherwise, assume that we are getting this data from a reliable source of both metadata and circulation information. """ - return ReplacementPolicy.from_license_source() + return ReplacementPolicy.from_license_source(_db) @classmethod def all(cls, _db, **kwargs): diff --git a/local_analytics_provider.py b/local_analytics_provider.py index afd85b81b..aee84a0ee 100644 --- a/local_analytics_provider.py +++ b/local_analytics_provider.py @@ -1,12 +1,19 @@ +from flask.ext.babel import lazy_gettext as _ +from model import Session, CirculationEvent + class LocalAnalyticsProvider(object): - @classmethod - def from_config(cls, config): - return cls() + NAME = _("Local Analytics") + + DESCRIPTION = _("Store analytics events in the 'circulationevents' database table.") + + def __init__(self, integration): + self.integration_id = integration.id - def collect_event(self, _db, license_pool, event_type, time, + def collect_event(self, library, license_pool, event_type, time, old_value=None, new_value=None, **kwargs): - from model import CirculationEvent + _db = Session.object_session(library) + CirculationEvent.log( _db, license_pool, event_type, old_value, new_value, start=time) -Provider = LocalAnalyticsProvider \ No newline at end of file +Provider = LocalAnalyticsProvider diff --git a/metadata_layer.py b/metadata_layer.py index e8b6bc7c7..2a6e15311 100644 --- a/metadata_layer.py +++ b/metadata_layer.py @@ -43,6 +43,7 @@ Work, ) from classifier import NO_VALUE, NO_NUMBER +from analytics import Analytics class ReplacementPolicy(object): """How serious should we be about overwriting old metadata with @@ -59,6 +60,7 @@ def __init__( link_content=False, mirror=None, content_modifier=None, + analytics=None, http_get=None, even_if_not_apparently_updated=False, presentation_calculation_policy=None @@ -73,6 +75,7 @@ def __init__( self.even_if_not_apparently_updated = even_if_not_apparently_updated self.mirror = mirror self.content_modifier = content_modifier + self.analytics = analytics self.http_get = http_get self.presentation_calculation_policy = ( presentation_calculation_policy or @@ -80,11 +83,12 @@ def __init__( ) @classmethod - def from_license_source(self, **args): + def from_license_source(self, _db, **args): """When gathering data from the license source, overwrite all old data from this source with new data from the same source. Also overwrite an old rights status with an updated status and update - the list of available formats. + the list of available formats. Log availability changes to the + configured analytics services. """ return ReplacementPolicy( identifiers=True, @@ -93,6 +97,7 @@ def from_license_source(self, **args): links=True, rights=True, formats=True, + analytics=Analytics(_db), **args ) @@ -802,11 +807,14 @@ def primary_identifier(self, _db): self.primary_identifier_obj = obj return self.primary_identifier_obj - def license_pool(self, _db, collection): + def license_pool(self, _db, collection, analytics=None): """Find or create a LicensePool object for this CirculationData. :param collection: The LicensePool object will be associated with the given Collection. + + :param analytics: If the LicensePool is newly created, the event + will be tracked with this. """ if not collection: raise ValueError( @@ -830,17 +838,15 @@ def license_pool(self, _db, collection): license_pool.open_access = self.has_open_access_link license_pool.availability_time = self.last_checked # This is our first time seeing this LicensePool. Log its - # occurence as a separate event. - event = get_one_or_create( - _db, CirculationEvent, - type=CirculationEvent.DISTRIBUTOR_TITLE_ADD, - license_pool=license_pool, - create_method_kwargs=dict( - start=self.last_checked, - delta=1, - end=self.last_checked, - ) - ) + # occurrence as a separate analytics event. + if analytics: + for library in collection.libraries: + analytics.collect_event( + library, license_pool, + CirculationEvent.DISTRIBUTOR_TITLE_ADD, + self.last_checked, + old_value=0, new_value=1, + ) license_pool.last_checked = self.last_checked return license_pool, is_new @@ -900,7 +906,7 @@ def apply(self, _db, collection, replace=None): pool = None if collection: - pool, ignore = self.license_pool(_db, collection) + pool, ignore = self.license_pool(_db, collection, replace.analytics) data_source = self.data_source(_db) identifier = self.primary_identifier(_db) @@ -978,11 +984,13 @@ def apply(self, _db, collection, replace=None): if pool and self._availability_needs_update(pool): # Update availabily information. This may result in # the issuance of additional circulation events. + analytics = Analytics(_db) changed_availability = pool.update_availability( new_licenses_owned=self.licenses_owned, new_licenses_available=self.licenses_available, new_licenses_reserved=self.licenses_reserved, new_patrons_in_hold_queue=self.patrons_in_hold_queue, + analytics=replace.analytics, as_of=self.last_checked ) diff --git a/mock_analytics_provider.py b/mock_analytics_provider.py index d72b9e092..8270c4f0a 100644 --- a/mock_analytics_provider.py +++ b/mock_analytics_provider.py @@ -1,18 +1,15 @@ class MockAnalyticsProvider(object): """A mock analytics provider that keeps track of how many times it's called.""" - @classmethod - def from_config(cls, config): - return cls(config.get('option')) - - def __init__(self, option=None): - self.option = option + def __init__(self, integration=None, library=None): self.count = 0 self.event = None + if integration: + self.url = integration.url - def collect_event(self, _db, lp, event_type, time, **kwargs): + def collect_event(self, library, lp, event_type, time=None, **kwargs): self.count = self.count + 1 self.event_type = event_type self.time = time -Provider = MockAnalyticsProvider \ No newline at end of file +Provider = MockAnalyticsProvider diff --git a/model.py b/model.py index 52202d7c8..13c750c84 100644 --- a/model.py +++ b/model.py @@ -136,7 +136,6 @@ INT4RANGE, ) from s3 import S3Uploader -from analytics import Analytics DEBUG = False @@ -6357,7 +6356,8 @@ def needs_update(self): def update_availability( self, new_licenses_owned, new_licenses_available, - new_licenses_reserved, new_patrons_in_hold_queue, as_of=None): + new_licenses_reserved, new_patrons_in_hold_queue, + analytics=None, as_of=None): """Update the LicensePool with new availability information. Log the implied changes as CirculationEvents. """ @@ -6395,9 +6395,11 @@ def update_availability( if not event_name: continue - Analytics.collect_event( - _db, self, event_name, as_of, - old_value=old_value, new_value=new_value) + if analytics: + for library in self.collection.libraries: + analytics.collect_event( + library, self, event_name, as_of, + old_value=old_value, new_value=new_value) # Update the license pool with the latest information. any_data = False diff --git a/tests/test_analytics.py b/tests/test_analytics.py index 116ce3c02..1dd741003 100644 --- a/tests/test_analytics.py +++ b/tests/test_analytics.py @@ -1,5 +1,6 @@ from nose.tools import ( eq_, + set_trace, ) from config import ( Configuration, @@ -9,42 +10,104 @@ from mock_analytics_provider import MockAnalyticsProvider from local_analytics_provider import LocalAnalyticsProvider from . import DatabaseTest -from model import CirculationEvent +from model import ( + CirculationEvent, + ExternalIntegration, + Library, + create, +) import json class TestAnalytics(DatabaseTest): def test_initialize(self): - # supports multiple analytics providers - config = { "option": "value" } - analytics = Analytics.initialize(["mock_analytics_provider"], config) - assert isinstance(analytics.providers[0], MockAnalyticsProvider) - eq_("value", analytics.providers[0].option) + # supports multiple analytics providers, site-wide or with libraries + + # Two site-wide integrations + mock_integration, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="mock_analytics_provider" + ) + mock_integration.url = self._str + local_integration, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="local_analytics_provider" + ) + + # A broken integration + missing_integration, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="missing_provider" + ) + + # Two library-specific integrations + l1, ignore = create(self._db, Library, short_name="L1") + l2, ignore = create(self._db, Library, short_name="L2") + + library_integration1, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="mock_analytics_provider" + ) + library_integration1.libraries += [l1, l2] + + library_integration2, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="mock_analytics_provider" + ) + library_integration2.libraries += [l2] + + analytics = Analytics(self._db) + eq_(2, len(analytics.sitewide_providers)) + assert isinstance(analytics.sitewide_providers[0], MockAnalyticsProvider) + eq_(mock_integration.url, analytics.sitewide_providers[0].url) + assert isinstance(analytics.sitewide_providers[1], LocalAnalyticsProvider) + assert missing_integration.id in analytics.initialization_exceptions + + eq_(1, len(analytics.library_providers[l1.id])) + assert isinstance(analytics.library_providers[l1.id][0], MockAnalyticsProvider) + + eq_(2, len(analytics.library_providers[l2.id])) + for provider in analytics.library_providers[l2.id]: + assert isinstance(provider, MockAnalyticsProvider) def test_collect_event(self): - config = { - Configuration.POLICIES: { - Configuration.ANALYTICS_POLICY: ["mock_analytics_provider"] - }, - "option": "value" - } - with temp_config(config) as config: - work = self._work(title="title", with_license_pool=True) - [lp] = work.license_pools - Analytics.collect_event(self._db, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, None) - mock = Analytics.instance().providers[0] - eq_(1, mock.count) - - def test_load_providers_from_config(self): - config = { - Configuration.POLICIES: { - Configuration.ANALYTICS_POLICY: ["mock_analytics_provider"] - }, - "option": "value" - } - providers = Analytics.load_providers_from_config(config) - eq_("mock_analytics_provider", providers[0]) - - def test_load_providers_from_config_without_analytics(self): - providers = Analytics.load_providers_from_config({}) - eq_("local_analytics_provider", providers[0]) + sitewide_integration, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="mock_analytics_provider" + ) + + library, ignore = create(self._db, Library, short_name="library") + library_integration, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="mock_analytics_provider", + ) + library_integration.libraries += [library] + + work = self._work(title="title", with_license_pool=True) + [lp] = work.license_pools + analytics = Analytics(self._db) + sitewide_provider = analytics.sitewide_providers[0] + library_provider = analytics.library_providers[library.id][0] + + analytics.collect_event(self._default_library, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, None) + + # The sitewide provider was called. + eq_(1, sitewide_provider.count) + eq_(CirculationEvent.DISTRIBUTOR_CHECKIN, sitewide_provider.event_type) + + # The library provider wasn't called, since the event was for a different library. + eq_(0, library_provider.count) + + analytics.collect_event(library, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, None) + + # Now both providers were called, since the event was for the library provider's library. + eq_(2, sitewide_provider.count) + eq_(1, library_provider.count) + eq_(CirculationEvent.DISTRIBUTOR_CHECKIN, library_provider.event_type) diff --git a/tests/test_coverage.py b/tests/test_coverage.py index 74f50ae58..e63749a69 100644 --- a/tests/test_coverage.py +++ b/tests/test_coverage.py @@ -473,7 +473,7 @@ def test_replacement_policy(self): eq_(True, provider.replacement_policy.identifiers) eq_(False, provider.replacement_policy.formats) - policy = ReplacementPolicy.from_license_source() + policy = ReplacementPolicy.from_license_source(self._db) provider = AlwaysSuccessfulCoverageProvider( self._db, replacement_policy=policy ) diff --git a/tests/test_local_analytics_provider.py b/tests/test_local_analytics_provider.py index 98daf7e8b..f5951d138 100644 --- a/tests/test_local_analytics_provider.py +++ b/tests/test_local_analytics_provider.py @@ -3,13 +3,21 @@ ) from local_analytics_provider import LocalAnalyticsProvider from . import DatabaseTest -from model import CirculationEvent +from model import ( + CirculationEvent, + ExternalIntegration, + create, +) import datetime class TestLocalAnalyticsProvider(DatabaseTest): def test_collect_event(self): - la = LocalAnalyticsProvider() + integration, ignore = create( + self._db, ExternalIntegration, + goal=ExternalIntegration.ANALYTICS_GOAL, + protocol="core.local_analytics_provider") + la = LocalAnalyticsProvider(integration) work = self._work( title="title", authors="author", fiction=True, audience="audience", language="lang", @@ -18,7 +26,7 @@ def test_collect_event(self): [lp] = work.license_pools now = datetime.datetime.utcnow() la.collect_event( - self._db, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, now, + self._default_library, lp, CirculationEvent.DISTRIBUTOR_CHECKIN, now, old_value=None, new_value=None) [event] = self._db \ .query(CirculationEvent) \ diff --git a/tests/test_model.py b/tests/test_model.py index b669ff349..d495ffa89 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -101,10 +101,6 @@ DummyHTTPClient, ) -from analytics import ( - Analytics, - temp_analytics -) from mock_analytics_provider import MockAnalyticsProvider class TestDatabaseInterface(DatabaseTest): @@ -1298,18 +1294,17 @@ def test_update_availability(self): assert (datetime.datetime.utcnow() - work.last_update_time) < datetime.timedelta(seconds=2) def test_update_availability_triggers_analytics(self): - with temp_analytics("mock_analytics_provider", {}): - work = self._work(with_license_pool=True) - [pool] = work.license_pools - pool.update_availability(30, 20, 2, 0) - provider = Analytics.instance().providers[0] - count = provider.count - pool.update_availability(30, 21, 2, 0) - eq_(count + 1, provider.count) - eq_(CirculationEvent.DISTRIBUTOR_CHECKIN, provider.event_type) - pool.update_availability(30, 21, 2, 1) - eq_(count + 2, provider.count) - eq_(CirculationEvent.DISTRIBUTOR_HOLD_PLACE, provider.event_type) + work = self._work(with_license_pool=True) + [pool] = work.license_pools + provider = MockAnalyticsProvider() + pool.update_availability(30, 20, 2, 0, analytics=provider) + count = provider.count + pool.update_availability(30, 21, 2, 0, analytics=provider) + eq_(count + 1, provider.count) + eq_(CirculationEvent.DISTRIBUTOR_CHECKIN, provider.event_type) + pool.update_availability(30, 21, 2, 1, analytics=provider) + eq_(count + 2, provider.count) + eq_(CirculationEvent.DISTRIBUTOR_HOLD_PLACE, provider.event_type) def test_update_availability_does_nothing_if_given_no_data(self): """Passing an empty set of data into update_availability is